Skip to content

Commit

Permalink
Add categorical accessor and move from enums to mappings
Browse files Browse the repository at this point in the history
  • Loading branch information
polakowo committed Jul 25, 2021
1 parent e1eabe7 commit 129deb6
Show file tree
Hide file tree
Showing 38 changed files with 1,548 additions and 481 deletions.
24 changes: 18 additions & 6 deletions tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pytest
from datetime import datetime

from vectorbt import settings
import vectorbt as vbt
from vectorbt.base import (
accessors,
array_wrapper,
Expand All @@ -22,9 +22,6 @@
except ImportError:
ray_available = False

settings.broadcasting['index_from'] = 'stack'
settings.broadcasting['columns_from'] = 'stack'

day_dt = np.timedelta64(86400000000000)

# Initialize global variables
Expand Down Expand Up @@ -57,6 +54,22 @@
multi_c = pd.MultiIndex.from_arrays([['a7', 'b7', 'c7'], ['a8', 'b8', 'c8']], names=['c7', 'c8'])
df5 = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=multi_i, columns=multi_c)


# ############# Global ############# #

def setup_module():
vbt.settings.numba['check_func_suffix'] = True
vbt.settings.broadcasting['index_from'] = 'stack'
vbt.settings.broadcasting['columns_from'] = 'stack'
vbt.settings.caching.enabled = False
vbt.settings.caching.whitelist = []
vbt.settings.caching.blacklist = []


def teardown_module():
vbt.settings.reset()


# ############# column_grouper.py ############# #


Expand Down Expand Up @@ -2205,7 +2218,6 @@ def test_flex(self, test_inputs):

called_dict = {}


PandasIndexer = indexing.PandasIndexer
ParamIndexer = indexing.build_param_indexer(['param1', 'param2', 'tuple'])

Expand Down Expand Up @@ -3166,7 +3178,7 @@ def combine_func3_nb(x, y):
columns=pd.Index(['a6', 'b6', 'c6'], dtype='object', name='c6')
)
)

target = pd.DataFrame(
np.array([
[232, 233, 234],
Expand Down
119 changes: 119 additions & 0 deletions tests/test_cat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
import pandas as pd
import numpy as np
from datetime import datetime

import vectorbt as vbt

df = pd.DataFrame({
'a': [1, 2, 3, 4, 5],
'b': [5, 4, 3, 2, 1],
'c': [1, 2, 3, 2, 1]
}, index=pd.DatetimeIndex([
datetime(2018, 1, 1),
datetime(2018, 1, 2),
datetime(2018, 1, 3),
datetime(2018, 1, 4),
datetime(2018, 1, 5)
]))
group_by = np.array(['g1', 'g1', 'g2'])


# ############# Global ############# #

def setup_module():
vbt.settings.numba['check_func_suffix'] = True
vbt.settings.caching.enabled = False
vbt.settings.caching.whitelist = []
vbt.settings.caching.blacklist = []


def teardown_module():
vbt.settings.reset()


# ############# accessors.py ############# #


class TestAccessors:
def test_stats(self):
stat_index = pd.Index([
'Start', 'End', 'Period',
'Value Counts: 1',
'Value Counts: 2',
'Value Counts: 3',
'Value Counts: 4',
'Value Counts: 5'
], dtype='object')
pd.testing.assert_series_equal(
df.vbt.cat.stats(column='a'),
pd.Series([
pd.Timestamp('2018-01-01 00:00:00'),
pd.Timestamp('2018-01-05 00:00:00'),
pd.Timedelta('5 days 00:00:00'),
1, 1, 1, 1, 1
],
index=stat_index,
name='a'
)
)
pd.testing.assert_series_equal(
df.vbt.cat.stats(column='a', settings=dict(mapping={
1: 'test1',
2: 'test2',
3: 'test3',
4: 'test4',
5: 'test5'
})),
pd.Series([
pd.Timestamp('2018-01-01 00:00:00'),
pd.Timestamp('2018-01-05 00:00:00'),
pd.Timedelta('5 days 00:00:00'),
1, 1, 1, 1, 1
],
index=pd.Index([
'Start', 'End', 'Period',
'Value Counts: test1',
'Value Counts: test2',
'Value Counts: test3',
'Value Counts: test4',
'Value Counts: test5'
], dtype='object'),
name='a'
)
)
pd.testing.assert_series_equal(
df.vbt.cat['c'].stats(),
pd.Series([
pd.Timestamp('2018-01-01 00:00:00'),
pd.Timestamp('2018-01-05 00:00:00'),
pd.Timedelta('5 days 00:00:00'),
2, 2, 1
],
index=stat_index[:-2],
name='c'
)
)
pd.testing.assert_series_equal(
df.vbt.cat.stats(column='c'),
df.vbt.cat(group_by=group_by).stats(column='c', group_by=False)
)
pd.testing.assert_series_equal(
df.vbt.cat(group_by=group_by)['g2'].stats(),
pd.Series([
pd.Timestamp('2018-01-01 00:00:00'),
pd.Timestamp('2018-01-05 00:00:00'),
pd.Timedelta('5 days 00:00:00'),
2, 2, 1
],
index=stat_index[:-2],
name='g2'
)
)
pd.testing.assert_series_equal(
df.vbt.cat(group_by=group_by).stats(column='g2'),
df.vbt.cat.stats(column='g2', group_by=group_by)
)
stats_df = df.vbt.cat.stats(agg_func=None)
assert stats_df.shape == (3, 8)
pd.testing.assert_index_equal(stats_df.index, df.vbt.cat.wrapper.columns)
pd.testing.assert_index_equal(stats_df.columns, stat_index)
13 changes: 13 additions & 0 deletions tests/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,19 @@
seed = 42


# ############# Global ############# #

def setup_module():
vbt.settings.numba['check_func_suffix'] = True
vbt.settings.caching.enabled = False
vbt.settings.caching.whitelist = []
vbt.settings.caching.blacklist = []


def teardown_module():
vbt.settings.reset()


# ############# base.py ############# #


Expand Down
129 changes: 129 additions & 0 deletions tests/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,19 @@ def col_nanmean_nb(col, x):
return np.nanmean(x)


# ############# Global ############# #

def setup_module():
vbt.settings.numba['check_func_suffix'] = True
vbt.settings.caching.enabled = False
vbt.settings.caching.whitelist = []
vbt.settings.caching.blacklist = []


def teardown_module():
vbt.settings.reset()


# ############# accessors.py ############# #


Expand Down Expand Up @@ -752,6 +765,122 @@ def test_describe(self):
}, index=test_against.index)
)

def test_value_counts(self):
pd.testing.assert_series_equal(
df['a'].vbt.value_counts(),
pd.Series(
np.array([1, 1, 1, 1, 1]),
index=pd.Float64Index([1.0, 2.0, 3.0, 4.0, np.nan], dtype='float64'),
name='a'
)
)
mapping = {1.: 'one', 2.: 'two', 3.: 'three', 4.: 'four'}
pd.testing.assert_series_equal(
df['a'].vbt.value_counts(mapping=mapping),
pd.Series(
np.array([1, 1, 1, 1, 1]),
index=pd.Index(['one', 'two', 'three', 'four', None], dtype='object'),
name='a'
)
)
pd.testing.assert_frame_equal(
df.vbt.value_counts(),
pd.DataFrame(
np.array([
[1, 1, 2],
[1, 1, 2],
[1, 1, 0],
[1, 1, 0],
[1, 1, 1]
]),
index=pd.Float64Index([1.0, 2.0, 3.0, 4.0, np.nan], dtype='float64'),
columns=df.columns
)
)
pd.testing.assert_frame_equal(
df.vbt.value_counts(group_by=group_by),
pd.DataFrame(
np.array([
[2, 2],
[2, 2],
[2, 0],
[2, 0],
[2, 1]
]),
index=pd.Float64Index([1.0, 2.0, 3.0, 4.0, np.nan], dtype='float64'),
columns=pd.Index(['g1', 'g2'], dtype='object')
)
)
pd.testing.assert_frame_equal(
df.vbt.value_counts(sort_labels=False),
pd.DataFrame(
np.array([
[1, 1, 2],
[1, 1, 2],
[1, 1, 0],
[1, 1, 0],
[1, 1, 1]
]),
index=pd.Float64Index([1.0, 2.0, 4.0, 3.0, np.nan], dtype='float64'),
columns=df.columns
)
)
pd.testing.assert_frame_equal(
df.vbt.value_counts(sort=True),
pd.DataFrame(
np.array([
[1, 1, 2],
[1, 1, 2],
[1, 1, 1],
[1, 1, 0],
[1, 1, 0]
]),
index=pd.Float64Index([1.0, 2.0, np.nan, 3.0, 4.0], dtype='float64'),
columns=df.columns
)
)
pd.testing.assert_frame_equal(
df.vbt.value_counts(sort=True, ascending=True),
pd.DataFrame(
np.array([
[1, 1, 0],
[1, 1, 0],
[1, 1, 1],
[1, 1, 2],
[1, 1, 2]
]),
index=pd.Float64Index([3.0, 4.0, np.nan, 1.0, 2.0], dtype='float64'),
columns=df.columns
)
)
pd.testing.assert_frame_equal(
df.vbt.value_counts(sort=True, normalize=True),
pd.DataFrame(
np.array([
[0.06666666666666667, 0.06666666666666667, 0.13333333333333333],
[0.06666666666666667, 0.06666666666666667, 0.13333333333333333],
[0.06666666666666667, 0.06666666666666667, 0.06666666666666667],
[0.06666666666666667, 0.06666666666666667, 0.0],
[0.06666666666666667, 0.06666666666666667, 0.0]
]),
index=pd.Float64Index([1.0, 2.0, np.nan, 3.0, 4.0], dtype='float64'),
columns=df.columns
)
)
pd.testing.assert_frame_equal(
df.vbt.value_counts(sort=True, normalize=True, dropna=True),
pd.DataFrame(
np.array([
[0.08333333333333333, 0.08333333333333333, 0.16666666666666666],
[0.08333333333333333, 0.08333333333333333, 0.16666666666666666],
[0.08333333333333333, 0.08333333333333333, 0.0],
[0.08333333333333333, 0.08333333333333333, 0.0]
]),
index=pd.Float64Index([1.0, 2.0, 3.0, 4.0], dtype='float64'),
columns=df.columns
)
)

def test_drawdown(self):
pd.testing.assert_series_equal(
df['a'].vbt.drawdown(),
Expand Down
Loading

0 comments on commit 129deb6

Please sign in to comment.