Skip to content

Commit

Permalink
BLD: improved the bundle test suite and related adjustments
Browse files Browse the repository at this point in the history
  • Loading branch information
fredfortier committed Jan 18, 2018
1 parent 563fc43 commit 51126fd
Show file tree
Hide file tree
Showing 11 changed files with 409 additions and 326 deletions.
24 changes: 21 additions & 3 deletions catalyst/exchange/ccxt/ccxt_exchange.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,10 @@
UnsupportedHistoryFrequencyError
from catalyst.exchange.exchange_execution import ExchangeLimitOrder
from catalyst.exchange.utils.exchange_utils import mixin_market_params, \
from_ms_timestamp, get_epoch, get_exchange_folder, get_catalyst_symbol, \
get_exchange_folder, get_catalyst_symbol, \
get_exchange_auth
from exchange.utils.datetime_utils import from_ms_timestamp, get_epoch, \
get_periods_range
from catalyst.finance.order import Order, ORDER_STATUS
from catalyst.finance.transaction import Transaction

Expand Down Expand Up @@ -399,7 +401,7 @@ def get_frequency(timeframe, raise_error=True):
timeframe, source='ccxt', raise_error=raise_error
)

def get_candles(self, freq, assets, bar_count=None, start_dt=None,
def get_candles(self, freq, assets, bar_count=1, start_dt=None,
end_dt=None):
is_single = (isinstance(assets, TradingPair))
if is_single:
Expand All @@ -416,9 +418,25 @@ def get_candles(self, freq, assets, bar_count=None, start_dt=None,
freqs=freqs,
)

if start_dt is not None and end_dt is not None:
raise ValueError(
'Please provide either start_dt or end_dt, not both.'
)

elif end_dt is not None:
dt_range = get_periods_range(
end_dt=end_dt,
periods=bar_count,
freq=freq,
)
# skip the left bound of the range since the open range is
# on the right bound
start_dt = dt_range[1]

ms = None
if start_dt is not None:
delta = start_dt - get_epoch()
if end_dt is not None:
delta = start_dt - get_epoch()
ms = int(delta.total_seconds()) * 1000

candles = dict()
Expand Down
11 changes: 5 additions & 6 deletions catalyst/exchange/exchange.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@
PricingDataNotLoadedError, \
NoDataAvailableOnExchange, NoValueForField, LastCandleTooEarlyError, \
TickerNotFoundError, NotEnoughCashError
from catalyst.exchange.utils.bundle_utils import get_start_dt, \
get_delta, get_periods, get_periods_range
from exchange.utils.datetime_utils import get_delta, get_periods_range, \
get_periods, get_start_dt, get_frequency
from catalyst.exchange.utils.exchange_utils import get_exchange_symbols, \
get_frequency, resample_history_df, has_bundle
resample_history_df, has_bundle
from logbook import Logger

log = Logger('Exchange', level=LOG_LEVEL)
Expand Down Expand Up @@ -433,7 +433,7 @@ def get_series_from_candles(self, candles, start_dt, end_dt,
series = pd.Series(values, index=dates)

periods = get_periods_range(
start_dt, end_dt, data_frequency
start_dt=start_dt, end_dt=end_dt, freq=data_frequency
)
# TODO: ensure that this working as expected, if not use fillna
series = series.reindex(
Expand Down Expand Up @@ -929,8 +929,7 @@ def cancel_order(self, order_param, symbol_or_asset=None):
pass

@abstractmethod
def get_candles(self, freq, assets, bar_count=None,
start_dt=None, end_dt=None):
def get_candles(self, freq, assets, bar_count, start_dt=None, end_dt=None):
"""
Retrieve OHLCV candles for the given assets
Expand Down
6 changes: 3 additions & 3 deletions catalyst/exchange/exchange_bundle.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@
NoDataAvailableOnExchange, \
PricingDataNotLoadedError, DataCorruptionError, PricingDataValueError
from catalyst.exchange.utils.bundle_utils import range_in_bundle, \
get_bcolz_chunk, get_month_start_end, \
get_year_start_end, get_df_from_arrays, get_start_dt, get_period_label, \
get_delta, get_assets
get_bcolz_chunk, get_df_from_arrays, get_assets
from exchange.utils.datetime_utils import get_delta, get_start_dt, \
get_period_label, get_month_start_end, get_year_start_end
from catalyst.exchange.utils.exchange_utils import get_exchange_folder, \
save_exchange_symbols, mixin_market_params, get_catalyst_symbol
from catalyst.utils.cli import maybe_show_progress
Expand Down
4 changes: 2 additions & 2 deletions catalyst/exchange/exchange_data_portal.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
from catalyst.exchange.exchange_errors import (
ExchangeRequestError,
PricingDataNotLoadedError)
from catalyst.exchange.utils.exchange_utils import get_frequency, \
resample_history_df, group_assets_by_exchange
from catalyst.exchange.utils.exchange_utils import resample_history_df, group_assets_by_exchange
from exchange.utils.datetime_utils import get_frequency
from logbook import Logger
from redo import retry

Expand Down
225 changes: 13 additions & 212 deletions catalyst/exchange/utils/bundle_utils.py
Original file line number Diff line number Diff line change
@@ -1,51 +1,24 @@
import calendar
import os
import tarfile
from datetime import timedelta, datetime, date
from datetime import datetime

import numpy as np
import pandas as pd
import pytz

from catalyst.data.bundles.core import download_without_progress
from catalyst.exchange.utils.exchange_utils import get_exchange_bundles_folder
import os
import tarfile
from datetime import datetime

EXCHANGE_NAMES = ['bitfinex', 'bittrex', 'poloniex']
API_URL = 'http://data.enigma.co/api/v1'


def get_date_from_ms(ms):
"""
The date from the number of miliseconds from the epoch.
Parameters
----------
ms: int
Returns
-------
datetime
"""
return datetime.fromtimestamp(ms / 1000.0)


def get_seconds_from_date(date):
"""
The number of seconds from the epoch.
Parameters
----------
date: datetime
Returns
-------
int
import numpy as np
import pandas as pd

"""
epoch = datetime.utcfromtimestamp(0)
epoch = epoch.replace(tzinfo=pytz.UTC)
from catalyst.data.bundles.core import download_without_progress
from catalyst.exchange.utils.exchange_utils import get_exchange_bundles_folder

return int((date - epoch).total_seconds())
EXCHANGE_NAMES = ['bitfinex', 'bittrex', 'poloniex']
API_URL = 'http://data.enigma.co/api/v1'


def get_bcolz_chunk(exchange_name, symbol, data_frequency, period):
Expand Down Expand Up @@ -77,8 +50,8 @@ def get_bcolz_chunk(exchange_name, symbol, data_frequency, period):
if not os.path.isdir(path):
url = 'https://s3.amazonaws.com/enigmaco/catalyst-bundles/' \
'exchange-{exchange}/{name}.tar.gz'.format(
exchange=exchange_name,
name=name)
exchange=exchange_name,
name=name)

bytes = download_without_progress(url)
with tarfile.open('r', fileobj=bytes) as tar:
Expand All @@ -87,178 +60,6 @@ def get_bcolz_chunk(exchange_name, symbol, data_frequency, period):
return path


def get_delta(periods, data_frequency):
"""
Get a time delta based on the specified data frequency.
Parameters
----------
periods: int
data_frequency: str
Returns
-------
timedelta
"""
return timedelta(minutes=periods) \
if data_frequency == 'minute' else timedelta(days=periods)


def get_periods_range(start_dt, end_dt, freq):
"""
Get a date range for the specified parameters.
Parameters
----------
start_dt: datetime
end_dt: datetime
freq: str
Returns
-------
DateTimeIndex
"""
if freq == 'minute':
freq = 'T'

elif freq == 'daily':
freq = 'D'

return pd.date_range(start_dt, end_dt, freq=freq)


def get_periods(start_dt, end_dt, freq):
"""
The number of periods in the specified range.
Parameters
----------
start_dt: datetime
end_dt: datetime
freq: str
Returns
-------
int
"""
return len(get_periods_range(start_dt, end_dt, freq))


def get_start_dt(end_dt, bar_count, data_frequency, include_first=True):
"""
The start date based on specified end date and data frequency.
Parameters
----------
end_dt: datetime
bar_count: int
data_frequency: str
Returns
-------
datetime
"""
periods = bar_count
if periods > 1:
delta = get_delta(periods, data_frequency)
start_dt = end_dt - delta

if not include_first:
start_dt += get_delta(1, data_frequency)
else:
start_dt = end_dt

return start_dt


def get_period_label(dt, data_frequency):
"""
The period label for the specified date and frequency.
Parameters
----------
dt: datetime
data_frequency: str
Returns
-------
str
"""
if data_frequency == 'minute':
return '{}-{:02d}'.format(dt.year, dt.month)
else:
return '{}'.format(dt.year)


def get_month_start_end(dt, first_day=None, last_day=None):
"""
The first and last day of the month for the specified date.
Parameters
----------
dt: datetime
first_day: datetime
last_day: datetime
Returns
-------
datetime, datetime
"""
month_range = calendar.monthrange(dt.year, dt.month)

if first_day:
month_start = first_day
else:
month_start = pd.to_datetime(datetime(
dt.year, dt.month, 1, 0, 0, 0, 0
), utc=True)

if last_day:
month_end = last_day
else:
month_end = pd.to_datetime(datetime(
dt.year, dt.month, month_range[1], 23, 59, 0, 0
), utc=True)

if month_end > pd.Timestamp.utcnow():
month_end = pd.Timestamp.utcnow().floor('1D')

return month_start, month_end


def get_year_start_end(dt, first_day=None, last_day=None):
"""
The first and last day of the year for the specified date.
Parameters
----------
dt: datetime
first_day: datetime
last_day: datetime
Returns
-------
datetime, datetime
"""
year_start = first_day if first_day \
else pd.to_datetime(date(dt.year, 1, 1), utc=True)
year_end = last_day if last_day \
else pd.to_datetime(date(dt.year, 12, 31), utc=True)

if year_end > pd.Timestamp.utcnow():
year_end = pd.Timestamp.utcnow().floor('1D')

return year_start, year_end


def get_df_from_arrays(arrays, periods):
"""
A DataFrame from the specified OHCLV arrays.
Expand Down
Loading

0 comments on commit 51126fd

Please sign in to comment.