Skip to content

Commit

Permalink
ENH: Add writer for minute bcolz format.
Browse files Browse the repository at this point in the history
Implement a writer for minute data into a format comprised of multiple
ctables, one for each individual asset, with a common 'index' shared by
all ctables where a given a dt maps to the same array index for all
equities and fields.

This format is pulled from the lazy-mainline/Q2.0 branch, with some
changes to the interface.

Add basic retrieval of values at a given dt to reader. Not yet used by
Zipline simulations, but added to support unit tests.

Also, rename stubbed out us_equity_minutes to minute_bars, since the
writer can be agnostic to asset type.
  • Loading branch information
Eddie Hebert committed Jan 21, 2016
1 parent 3b76981 commit d5c3b5a
Show file tree
Hide file tree
Showing 4 changed files with 866 additions and 93 deletions.
6 changes: 6 additions & 0 deletions docs/source/appendix.rst
Original file line number Diff line number Diff line change
Expand Up @@ -105,3 +105,9 @@ Asset Metadata

.. autoclass:: zipline.assets.assets.AssetConvertible
:members:

Data API
~~~~

.. autoclass:: zipline.data.minute_bars.BcolzMinuteBarWriter
:members:
313 changes: 313 additions & 0 deletions tests/data/test_minute_bars.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,313 @@
#
# Copyright 2016 Quantopian, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from datetime import timedelta
import os

from unittest import TestCase

from numpy import nan
from numpy.testing import assert_almost_equal
from pandas import (
DataFrame,
DatetimeIndex,
Timestamp,
)
from testfixtures import TempDirectory

from zipline.data.minute_bars import (
BcolzMinuteBarWriter,
BcolzMinuteBarReader,
BcolzMinuteOverlappingData,
US_EQUITIES_MINUTES_PER_DAY,
)
from zipline.finance.trading import TradingEnvironment


TEST_CALENDAR_START = Timestamp('2015-06-01', tz='UTC')
TEST_CALENDAR_STOP = Timestamp('2015-06-30', tz='UTC')


class BcolzMinuteBarTestCase(TestCase):

@classmethod
def setUpClass(cls):
cls.env = TradingEnvironment()
all_market_opens = cls.env.open_and_closes.market_open
indexer = all_market_opens.index.slice_indexer(
start=TEST_CALENDAR_START,
end=TEST_CALENDAR_STOP
)
cls.market_opens = all_market_opens[indexer]
cls.test_calendar_start = cls.market_opens.index[0]
cls.test_calendar_stop = cls.market_opens.index[-1]

def setUp(self):

self.dir_ = TempDirectory()
self.dir_.create()
self.dest = self.dir_.getpath('minute_bars')
os.makedirs(self.dest)
self.writer = BcolzMinuteBarWriter(
TEST_CALENDAR_START,
self.dest,
self.market_opens,
US_EQUITIES_MINUTES_PER_DAY,
)
self.reader = BcolzMinuteBarReader(self.dest)

def tearDown(self):
self.dir_.cleanup()

def test_write_one_ohlcv(self):
minute = self.market_opens[self.test_calendar_start]
sid = 1
data = DataFrame(
data={
'open': [10.0],
'high': [20.0],
'low': [30.0],
'close': [40.0],
'volume': [50.0]
},
index=[minute])
self.writer.write(sid, data)

open_price = self.reader.get_value(sid, minute, 'open')

self.assertEquals(10.0, open_price)

high_price = self.reader.get_value(sid, minute, 'high')

self.assertEquals(20.0, high_price)

low_price = self.reader.get_value(sid, minute, 'low')

self.assertEquals(30.0, low_price)

close_price = self.reader.get_value(sid, minute, 'close')

self.assertEquals(40.0, close_price)

volume_price = self.reader.get_value(sid, minute, 'volume')

self.assertEquals(50.0, volume_price)

def test_write_two_bars(self):
minute_0 = self.market_opens[self.test_calendar_start]
minute_1 = minute_0 + timedelta(minutes=1)
sid = 1
data = DataFrame(
data={
'open': [10.0, 11.0],
'high': [20.0, 21.0],
'low': [30.0, 31.0],
'close': [40.0, 41.0],
'volume': [50.0, 51.0]
},
index=[minute_0, minute_1])
self.writer.write(sid, data)

open_price = self.reader.get_value(sid, minute_0, 'open')

self.assertEquals(10.0, open_price)

high_price = self.reader.get_value(sid, minute_0, 'high')

self.assertEquals(20.0, high_price)

low_price = self.reader.get_value(sid, minute_0, 'low')

self.assertEquals(30.0, low_price)

close_price = self.reader.get_value(sid, minute_0, 'close')

self.assertEquals(40.0, close_price)

volume_price = self.reader.get_value(sid, minute_0, 'volume')

self.assertEquals(50.0, volume_price)

open_price = self.reader.get_value(sid, minute_1, 'open')

self.assertEquals(11.0, open_price)

high_price = self.reader.get_value(sid, minute_1, 'high')

self.assertEquals(21.0, high_price)

low_price = self.reader.get_value(sid, minute_1, 'low')

self.assertEquals(31.0, low_price)

close_price = self.reader.get_value(sid, minute_1, 'close')

self.assertEquals(41.0, close_price)

volume_price = self.reader.get_value(sid, minute_1, 'volume')

self.assertEquals(51.0, volume_price)

def test_write_on_second_day(self):
second_day = self.test_calendar_start + 1
minute = self.market_opens[second_day]
sid = 1
data = DataFrame(
data={
'open': [10.0],
'high': [20.0],
'low': [30.0],
'close': [40.0],
'volume': [50.0]
},
index=[minute])
self.writer.write(sid, data)

open_price = self.reader.get_value(sid, minute, 'open')

self.assertEquals(10.0, open_price)

high_price = self.reader.get_value(sid, minute, 'high')

self.assertEquals(20.0, high_price)

low_price = self.reader.get_value(sid, minute, 'low')

self.assertEquals(30.0, low_price)

close_price = self.reader.get_value(sid, minute, 'close')

self.assertEquals(40.0, close_price)

volume_price = self.reader.get_value(sid, minute, 'volume')

self.assertEquals(50.0, volume_price)

def test_write_empty(self):
minute = self.market_opens[self.test_calendar_start]
sid = 1
data = DataFrame(
data={
'open': [0],
'high': [0],
'low': [0],
'close': [0],
'volume': [0]
},
index=[minute])
self.writer.write(sid, data)

open_price = self.reader.get_value(sid, minute, 'open')

assert_almost_equal(nan, open_price)

high_price = self.reader.get_value(sid, minute, 'high')

assert_almost_equal(nan, high_price)

low_price = self.reader.get_value(sid, minute, 'low')

assert_almost_equal(nan, low_price)

close_price = self.reader.get_value(sid, minute, 'close')

assert_almost_equal(nan, close_price)

volume_price = self.reader.get_value(sid, minute, 'volume')

assert_almost_equal(0, volume_price)

def test_write_on_multiple_days(self):

tds = self.market_opens.index
days = tds[tds.slice_indexer(
start=self.test_calendar_start + 1,
end=self.test_calendar_start + 3
)]
minutes = DatetimeIndex([
self.market_opens[days[0]] + timedelta(minutes=60),
self.market_opens[days[1]] + timedelta(minutes=120),
])
sid = 1
data = DataFrame(
data={
'open': [10.0, 11.0],
'high': [20.0, 21.0],
'low': [30.0, 31.0],
'close': [40.0, 41.0],
'volume': [50.0, 51.0]
},
index=minutes)
self.writer.write(sid, data)

minute = minutes[0]

open_price = self.reader.get_value(sid, minute, 'open')

self.assertEquals(10.0, open_price)

high_price = self.reader.get_value(sid, minute, 'high')

self.assertEquals(20.0, high_price)

low_price = self.reader.get_value(sid, minute, 'low')

self.assertEquals(30.0, low_price)

close_price = self.reader.get_value(sid, minute, 'close')

self.assertEquals(40.0, close_price)

volume_price = self.reader.get_value(sid, minute, 'volume')

self.assertEquals(50.0, volume_price)

minute = minutes[1]

open_price = self.reader.get_value(sid, minute, 'open')

self.assertEquals(11.0, open_price)

high_price = self.reader.get_value(sid, minute, 'high')

self.assertEquals(21.0, high_price)

low_price = self.reader.get_value(sid, minute, 'low')

self.assertEquals(31.0, low_price)

close_price = self.reader.get_value(sid, minute, 'close')

self.assertEquals(41.0, close_price)

volume_price = self.reader.get_value(sid, minute, 'volume')

self.assertEquals(51.0, volume_price)

def test_no_overwrite(self):
minute = self.market_opens[TEST_CALENDAR_START]
sid = 1
data = DataFrame(
data={
'open': [10.0],
'high': [20.0],
'low': [30.0],
'close': [40.0],
'volume': [50.0]
},
index=[minute])
self.writer.write(sid, data)

with self.assertRaises(BcolzMinuteOverlappingData):
self.writer.write(sid, data)
Loading

0 comments on commit d5c3b5a

Please sign in to comment.