Skip to content

Commit

Permalink
Merge pull request quantopian#1302 from quantopian/point-in-time-asse…
Browse files Browse the repository at this point in the history
…t-db

Point in time asset db
  • Loading branch information
llllllllll authored Jul 26, 2016
2 parents f445671 + b7bb6ca commit d6e3da9
Show file tree
Hide file tree
Showing 19 changed files with 1,327 additions and 619 deletions.
1 change: 1 addition & 0 deletions tests/data/test_us_equity_pricing.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@
index=arange(1, 7),
columns=['start_date', 'end_date'],
).astype(datetime64)
EQUITY_INFO['symbol'] = [chr(ord('A') + n) for n in range(len(EQUITY_INFO))]

TEST_QUERY_ASSETS = EQUITY_INFO.index

Expand Down
1 change: 1 addition & 0 deletions tests/pipeline/test_us_equity_pricing_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@
index=arange(1, 7),
columns=['start_date', 'end_date'],
).astype(datetime64)
EQUITY_INFO['symbol'] = [chr(ord('A') + n) for n in range(len(EQUITY_INFO))]

TEST_QUERY_ASSETS = EQUITY_INFO.index

Expand Down
Binary file modified tests/resources/example_data.tar.gz
Binary file not shown.
10 changes: 9 additions & 1 deletion tests/test_algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -777,7 +777,10 @@ class TestTransformAlgorithm(WithLogger,

@classmethod
def make_futures_info(cls):
return pd.DataFrame.from_dict({3: {'multiplier': 10}}, 'index')
return pd.DataFrame.from_dict(
{3: {'multiplier': 10, 'symbol': 'F'}},
orient='index',
)

@classmethod
def make_equity_daily_bar_data(cls):
Expand Down Expand Up @@ -985,6 +988,7 @@ def test_minute_data(self, algo_class):
'start_date': start_session,
'end_date': period_end + timedelta(days=1)
}] * 2)
equities['symbol'] = ['A', 'B']
with TempDirectory() as tempdir, \
tmp_trading_env(equities=equities) as env:
sim_params = SimulationParameters(
Expand Down Expand Up @@ -2813,6 +2817,7 @@ def test_set_max_order_count(self):
metadata = pd.DataFrame.from_dict(
{
1: {
'symbol': 'SYM',
'start_date': start,
'end_date': start + timedelta(days=6)
},
Expand Down Expand Up @@ -2940,6 +2945,7 @@ def handle_data(algo, data):

def test_asset_date_bounds(self):
metadata = pd.DataFrame([{
'symbol': 'SYM',
'start_date': self.sim_params.start_session,
'end_date': '2020-01-01',
}])
Expand All @@ -2959,6 +2965,7 @@ def test_asset_date_bounds(self):
algo.run(data_portal)

metadata = pd.DataFrame([{
'symbol': 'SYM',
'start_date': '1989-01-01',
'end_date': '1990-01-01',
}])
Expand All @@ -2979,6 +2986,7 @@ def test_asset_date_bounds(self):
algo.run(data_portal)

metadata = pd.DataFrame([{
'symbol': 'SYM',
'start_date': '2020-01-01',
'end_date': '2021-01-01',
}])
Expand Down
181 changes: 122 additions & 59 deletions tests/test_assets.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,14 @@
"""
from contextlib import contextmanager
from datetime import datetime, timedelta
from functools import partial
import pickle
import sys
from types import GetSetDescriptorType
from unittest import TestCase
import uuid
import warnings

from nose.tools import raises
from nose_parameterized import parameterized
from numpy import full, int32, int64
import pandas as pd
Expand All @@ -39,7 +39,6 @@
Future,
AssetDBWriter,
AssetFinder,
AssetFinderCachedEquities,
)
from zipline.assets.synthetic import (
make_commodity_future_info,
Expand Down Expand Up @@ -341,7 +340,6 @@ def test_repr(self):
self.assertIn("tick_size=0.01", reprd)
self.assertIn("multiplier=500", reprd)

@raises(AssertionError)
def test_reduce(self):
assert_equal(
pickle.loads(pickle.dumps(self.future)).to_dict(),
Expand Down Expand Up @@ -485,6 +483,97 @@ def test_lookup_symbol_fuzzy(self):
self.assertEqual(2, finder.lookup_symbol('BRK_A', None, fuzzy=True))
self.assertEqual(2, finder.lookup_symbol('BRK_A', dt, fuzzy=True))

def test_lookup_symbol_change_ticker(self):
T = partial(pd.Timestamp, tz='utc')
metadata = pd.DataFrame.from_records(
[
# sid 0
{
'symbol': 'A',
'start_date': T('2014-01-01'),
'end_date': T('2014-01-05'),
},
{
'symbol': 'B',
'start_date': T('2014-01-06'),
'end_date': T('2014-01-10'),
},

# sid 1
{
'symbol': 'C',
'start_date': T('2014-01-01'),
'end_date': T('2014-01-05'),
},
{
'symbol': 'A', # claiming the unused symbol 'A'
'start_date': T('2014-01-06'),
'end_date': T('2014-01-10'),
},
],
index=[0, 0, 1, 1],
)
self.write_assets(equities=metadata)
finder = self.asset_finder

# note: these assertions walk forward in time, starting at assertions
# about ownership before the start_date and ending with assertions
# after the end_date; new assertions should be inserted in the correct
# locations

# no one held 'A' before 01
with self.assertRaises(SymbolNotFound):
finder.lookup_symbol('A', T('2013-12-31'))

# no one held 'C' before 01
with self.assertRaises(SymbolNotFound):
finder.lookup_symbol('C', T('2013-12-31'))

for asof in pd.date_range('2014-01-01', '2014-01-05', tz='utc'):
# from 01 through 05 sid 0 held 'A'
assert_equal(
finder.lookup_symbol('A', asof),
finder.retrieve_asset(0),
msg=str(asof),
)

# from 01 through 05 sid 1 held 'C'
assert_equal(
finder.lookup_symbol('C', asof),
finder.retrieve_asset(1),
msg=str(asof),
)

# no one held 'B' before 06
with self.assertRaises(SymbolNotFound):
finder.lookup_symbol('B', T('2014-01-05'))

# no one held 'C' after 06, however, no one has claimed it yet
# so it still maps to sid 1
assert_equal(
finder.lookup_symbol('C', T('2014-01-07')),
finder.retrieve_asset(1),
)

for asof in pd.date_range('2014-01-06', '2014-01-11', tz='utc'):
# from 06 through 10 sid 0 held 'B'
# we test through the 11th because sid 1 is the last to hold 'B'
# so it should ffill
assert_equal(
finder.lookup_symbol('B', asof),
finder.retrieve_asset(0),
msg=str(asof),
)

# from 06 through 10 sid 1 held 'A'
# we test through the 11th because sid 1 is the last to hold 'A'
# so it should ffill
assert_equal(
finder.lookup_symbol('A', asof),
finder.retrieve_asset(1),
msg=str(asof),
)

def test_lookup_symbol(self):

# Incrementing by two so that start and end dates for each
Expand Down Expand Up @@ -519,27 +608,7 @@ def test_lookup_symbol(self):
self.assertEqual(result.symbol, 'EXISTING')
self.assertEqual(result.sid, i)

def test_lookup_symbol_from_multiple_valid(self):
# This test asserts that we resolve conflicts in accordance with the
# following rules when we have multiple assets holding the same symbol
# at the same time:

# If multiple SIDs exist for symbol S at time T, return the candidate
# SID whose start_date is highest. (200 cases)

# If multiple SIDs exist for symbol S at time T, the best candidate
# SIDs share the highest start_date, return the SID with the highest
# end_date. (34 cases)

# It is the opinion of the author (ssanderson) that we should consider
# this malformed input and fail here. But this is the current indended
# behavior of the code, and I accidentally broke it while refactoring.
# These will serve as regression tests until the time comes that we
# decide to enforce this as an error.

# See https://github.com/quantopian/zipline/issues/837 for more
# details.

def test_fail_to_write_overlapping_data(self):
df = pd.DataFrame.from_records(
[
{
Expand Down Expand Up @@ -568,22 +637,16 @@ def test_lookup_symbol_from_multiple_valid(self):
]
)

self.write_assets(equities=df)

def check(expected_sid, date):
result = self.asset_finder.lookup_symbol(
'MULTIPLE', date,
)
self.assertEqual(result.symbol, 'MULTIPLE')
self.assertEqual(result.sid, expected_sid)
with self.assertRaises(ValueError) as e:
self.write_assets(equities=df)

# Sids 1 and 2 are eligible here. We should get asset 2 because it
# has the later end_date.
check(2, pd.Timestamp('2010-12-31'))

# Sids 1, 2, and 3 are eligible here. We should get sid 3 because
# it has a later start_date
check(3, pd.Timestamp('2011-01-01'))
self.assertEqual(
str(e.exception),
"Ambiguous ownership of 'MULTIPLE', multiple companies held this"
" ticker over the following ranges:\n"
"[('2010-01-01 00:00:00', '2012-01-01 00:00:00'),"
" ('2011-01-01 00:00:00', '2012-01-01 00:00:00')]",
)

def test_lookup_generic(self):
"""
Expand Down Expand Up @@ -1000,14 +1063,6 @@ def test_error_message_plurality(self,
)


class AssetFinderCachedEquitiesTestCase(AssetFinderTestCase):
asset_finder_type = AssetFinderCachedEquities

def write_assets(self, **kwargs):
super(AssetFinderCachedEquitiesTestCase, self).write_assets(**kwargs)
self.asset_finder.rehash_equities()


class TestFutureChain(WithAssetFinder, ZiplineTestCase):
@classmethod
def make_futures_info(cls):
Expand Down Expand Up @@ -1259,15 +1314,23 @@ def test_check_version(self):
version_table = self.metadata.tables['version_info']

# This should not raise an error
check_version_info(version_table, ASSET_DB_VERSION)
check_version_info(self.engine, version_table, ASSET_DB_VERSION)

# This should fail because the version is too low
with self.assertRaises(AssetDBVersionError):
check_version_info(version_table, ASSET_DB_VERSION - 1)
check_version_info(
self.engine,
version_table,
ASSET_DB_VERSION - 1,
)

# This should fail because the version is too high
with self.assertRaises(AssetDBVersionError):
check_version_info(version_table, ASSET_DB_VERSION + 1)
check_version_info(
self.engine,
version_table,
ASSET_DB_VERSION + 1,
)

def test_write_version(self):
version_table = self.metadata.tables['version_info']
Expand All @@ -1279,33 +1342,33 @@ def test_write_version(self):
# This should fail because the table has no version info and is,
# therefore, consdered v0
with self.assertRaises(AssetDBVersionError):
check_version_info(version_table, -2)
check_version_info(self.engine, version_table, -2)

# This should not raise an error because the version has been written
write_version_info(version_table, -2)
check_version_info(version_table, -2)
write_version_info(self.engine, version_table, -2)
check_version_info(self.engine, version_table, -2)

# Assert that the version is in the table and correct
self.assertEqual(sa.select((version_table.c.version,)).scalar(), -2)

# Assert that trying to overwrite the version fails
with self.assertRaises(sa.exc.IntegrityError):
write_version_info(version_table, -3)
write_version_info(self.engine, version_table, -3)

def test_finder_checks_version(self):
version_table = self.metadata.tables['version_info']
version_table.delete().execute()
write_version_info(version_table, -2)
check_version_info(version_table, -2)
write_version_info(self.engine, version_table, -2)
check_version_info(self.engine, version_table, -2)

# Assert that trying to build a finder with a bad db raises an error
with self.assertRaises(AssetDBVersionError):
AssetFinder(engine=self.engine)

# Change the version number of the db to the correct version
version_table.delete().execute()
write_version_info(version_table, ASSET_DB_VERSION)
check_version_info(version_table, ASSET_DB_VERSION)
write_version_info(self.engine, version_table, ASSET_DB_VERSION)
check_version_info(self.engine, version_table, ASSET_DB_VERSION)

# Now that the versions match, this Finder should succeed
AssetFinder(engine=self.engine)
Expand All @@ -1319,7 +1382,7 @@ def test_downgrade(self):
metadata = sa.MetaData(conn)
metadata.reflect(bind=self.engine)
version_table = metadata.tables['version_info']
check_version_info(version_table, 0)
check_version_info(self.engine, version_table, 0)

# Check some of the v1-to-v0 downgrades
self.assertTrue('futures_contracts' in metadata.tables)
Expand Down
20 changes: 12 additions & 8 deletions tests/test_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,20 +45,24 @@ def make_equity_info(cls):
return pd.DataFrame.from_dict(
{
1: {
"start_date": cls.START_DATE,
"end_date": cls.END_DATE + pd.Timedelta(days=1)
'symbol': 'A',
'start_date': cls.START_DATE,
'end_date': cls.END_DATE + pd.Timedelta(days=1)
},
2: {
"start_date": cls.START_DATE,
"end_date": cls.END_DATE + pd.Timedelta(days=1)
'symbol': 'B',
'start_date': cls.START_DATE,
'end_date': cls.END_DATE + pd.Timedelta(days=1)
},
3: {
"start_date": pd.Timestamp('2006-05-26', tz='utc'),
"end_date": pd.Timestamp('2006-08-09', tz='utc')
'symbol': 'C',
'start_date': pd.Timestamp('2006-05-26', tz='utc'),
'end_date': pd.Timestamp('2006-08-09', tz='utc')
},
4: {
"start_date": cls.START_DATE,
"end_date": cls.END_DATE + pd.Timedelta(days=1)
'symbol': 'D',
'start_date': cls.START_DATE,
'end_date': cls.END_DATE + pd.Timedelta(days=1)
},
},
orient='index',
Expand Down
3 changes: 3 additions & 0 deletions zipline/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,9 @@ def bundles():
"""List all of the available data bundles.
"""
for bundle in sorted(bundles_module.bundles.keys()):
if bundle.startswith('.'):
# hide the test data
continue
try:
ingestions = sorted(
(str(bundles_module.from_bundle_ingest_dirname(ing))
Expand Down
Loading

0 comments on commit d6e3da9

Please sign in to comment.