Skip to content

Commit

Permalink
v1.6.11-rc0 (#345)
Browse files Browse the repository at this point in the history
Co-authored-by: rtosholdings-bot <[email protected]>
  • Loading branch information
OrestZborowski-SIG and rtosholdings-bot authored Apr 18, 2023
1 parent b7715da commit 1a06bcb
Show file tree
Hide file tree
Showing 6 changed files with 538 additions and 20 deletions.
10 changes: 7 additions & 3 deletions riptable/rt_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -1531,6 +1531,7 @@ def __new__(
grouping = None

# prepare to eliminate sort_gb
arg_sort_gb = sort_gb
if sort_display is not None:
sort_gb = sort_display

Expand All @@ -1547,10 +1548,12 @@ def __new__(
_lex = lex

# default to bytestrings - more performant, less memory
arg_unicode = unicode
if unicode is None:
unicode = False

# default to 1-based indexing (filtering, etc. fully supported in this mode)
arg_base_index = base_index
if base_index is None:
base_index = 1

Expand All @@ -1574,16 +1577,17 @@ def __new__(
# ordered = True

# from categorical, deep copy - send to regular categorical.copy() to correctly preserve attributes
# use original arguments rather than defaulted ones to avoid warnings in copy().
if isinstance(values, Categorical):
return values.copy(
categories=categories, # main data
ordered=ordered,
sort_gb=sort_gb,
sort_gb=arg_sort_gb,
lex=lex, # sorting/hashing
base_index=base_index,
base_index=arg_base_index,
filter=filter, # priority options
dtype=dtype,
unicode=unicode,
unicode=arg_unicode,
invalid=invalid,
auto_add=auto_add, # misc options
from_matlab=from_matlab,
Expand Down
231 changes: 228 additions & 3 deletions riptable/rt_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,13 @@
"strptime_to_nano",
]

import time
import warnings
from datetime import date
from datetime import datetime as dt
from datetime import timezone
import math
import time
from typing import TYPE_CHECKING, List, Optional, Tuple, Union
import warnings

import numpy as np
import riptide_cpp as rc
Expand Down Expand Up @@ -1543,6 +1544,94 @@ def isnotnan(self):
"""
return ~self.isnan()

# ------------------------------------------------------------
def isfinite(self):
"""
Return a boolean array that's True for each `Date` element that's
not a NaN (Not a Number), False otherwise.
Both the DateTime NaN (0) and Riptable's int32 sentinel value are
considered to be NaN.
Returns
-------
`FastArray`
A `FastArray` of booleans that's True for each non-NaN element,
False otherwise.
See Also
--------
Date.isnan, DateTimeNano.isnan, DateTimeNano.isnotnan, riptable.isnan,
riptable.isnotnan, riptable.isnanorzero, FastArray.isnan,
FastArray.isnotnan, FastArray.notna, FastArray.isnanorzero,
Categorical.isnan, Categorical.isnotnan, Categorical.notna
Dataset.mask_or_isnan :
Return a boolean array that's True for each `Dataset` row that
contains at least one NaN.
Dataset.mask_and_isnan :
Return a boolean array that's True for each all-NaN `Dataset` row.
Notes
-----
Riptable currently uses 0 for the DateTime NaN value. This constant is
held in the `DateTimeBase` class.
Examples
--------
>>> d = rt.Date.range('20190201', days = 3, step = 2)
>>> d[0] = 0
>>> d[1] = d.inv
>>> d
Date(['Inv', 'Inv', '2019-02-05'])
>>> d.isfinite()
FastArray([False, False, True])
"""
return ~self.isnan()

# ------------------------------------------------------------
def isnotfinite(self):
"""
Return a boolean array that's True for each `Date` element that's
a NaN (Not a Number), False otherwise.
Both the DateTime NaN (0) and Riptable's int32 sentinel value are
considered to be NaN.
Returns
-------
`FastArray`
A `FastArray` of booleans that's True for each NaN element, False
otherwise.
See Also
--------
Date.isnotnan, DateTimeNano.isnan, DateTimeNano.isnotnan, riptable.isnan,
riptable.isnotnan, riptable.isnanorzero, FastArray.isnan,
FastArray.isnotnan, FastArray.notna, FastArray.isnanorzero,
Categorical.isnan, Categorical.isnotnan, Categorical.notna
Dataset.mask_or_isnan :
Return a boolean array that's True for each `Dataset` row that
contains at least one NaN.
Dataset.mask_and_isnan :
Return a boolean array that's True for each all-NaN `Dataset` row.
Notes
-----
Riptable currently uses 0 for the DateTime NaN value. This constant is
held in the `DateTimeBase` class.
Examples
--------
>>> d = rt.Date.range('20190201', days = 3, step = 2)
>>> d[0] = 0
>>> d[1] = d.inv
>>> d
Date(['Inv', 'Inv', '2019-02-05'])
>>> d.isnotfinite()
FastArray([ True, True, False])
"""
return self._fa.isnanorzero()

# ------------------------------------------------------------
@property
def yyyymmdd(self):
Expand Down Expand Up @@ -2569,7 +2658,7 @@ class DateSpan(DateBase):
"version": 0, # if no version, assume before versions implemented
"instance_vars": {"_display_length": DisplayLength.Long},
}
NAN_DATE = INVALID_DICT[7] # int32 sentinel
NAN_DATE = INVALID_DICT[np.dtype(np.int32).num] # int32 sentinel
forbidden_mathops = ()

def __new__(cls, arr, unit=None):
Expand Down Expand Up @@ -5084,6 +5173,96 @@ def isnotnan(self):
"""
return ~self.isnan()

# -------------------------------------------------------------
def isfinite(self):
"""
Return a boolean array that's True for each `DateTimeNano` element
that's not a NaN (Not a Number), False otherwise.
Both the DateTime NaN (0) and Riptable's int64 sentinel value are
considered to be NaN.
Returns
-------
`FastArray`
A `FastArray` of booleans that's True for each non-NaN element,
False otherwise.
See Also
--------
DateTimeNano.isnan, Date.isnan, Date.isnotnan, riptable.isnan,
riptable.isnotnan, riptable.isnanorzero, FastArray.isnan,
FastArray.isnotnan, FastArray.notna, FastArray.isnanorzero,
Categorical.isnan, Categorical.isnotnan, Categorical.notna
Dataset.mask_or_isnan :
Return a boolean array that's True for each `Dataset` row that
contains at least one NaN.
Dataset.mask_and_isnan :
Return a boolean array that's True for each all-NaN `Dataset` row.
Notes
-----
Riptable currently uses 0 for the DateTime NaN value. This constant is
held in the `DateTimeBase` class.
Examples
--------
>>> dtn = rt.DateTimeNano(['20210101 09:31:15', '20210519 05:21:17',
... '20210713 02:44:19'], from_tz = 'NYC')
>>> dtn[0] = 0
>>> dtn[1] = dtn.inv
>>> dtn
DateTimeNano(['Inv', 'Inv', '20210712 22:44:19.000000000'], to_tz='NYC')
>>> dtn.isfinite()
FastArray([False, False, True])
"""
return ~self.isnan()

# -------------------------------------------------------------
def isnotfinite(self):
"""
Return a boolean array that's True for each `DateTimeNano` element
that's a NaN (Not a Number), False otherwise.
Both the DateTime NaN (0) and Riptable's int64 sentinel value are
considered to be NaN.
Returns
-------
`FastArray`
A `FastArray` of booleans that's True for each NaN element, False
otherwise.
See Also
--------
DateTimeNano.isnotnan, Date.isnan, Date.isnotnan, riptable.isnan,
riptable.isnotnan, riptable.isnanorzero, FastArray.isnan,
FastArray.isnotnan, FastArray.notna, FastArray.isnanorzero,
Categorical.isnan, Categorical.isnotnan, Categorical.notna
Dataset.mask_or_isnan :
Return a boolean array that's True for each `Dataset` row that contains
at least one NaN.
Dataset.mask_and_isnan :
Return a boolean array that's True for each all-NaN `Dataset` row.
Notes
-----
Riptable currently uses 0 for the DateTime NaN value. This constant is
held in the `DateTimeBase` class.
Examples
--------
>>> dtn = rt.DateTimeNano(['20210101 09:31:15', '20210519 05:21:17',
... '20210713 02:44:19'], from_tz = 'NYC')
>>> dtn[0] = 0
>>> dtn[1] = dtn.inv
>>> dtn
DateTimeNano(['Inv', 'Inv', '20210712 22:44:19.000000000'], to_tz='NYC')
>>> dtn.isnotfinite()
FastArray([ True, True, False])
"""
return self._fa.isnanorzero()

# -------------------------------------------------------------
def _datetimenano_compare_check(self, funcname, other):
caller = self._fa
Expand Down Expand Up @@ -6986,6 +7165,8 @@ class DateSpanScalar(np.int32):

__slots__ = "_display_length"

NAN_DATESPANSCALAR = INVALID_DICT[np.dtype(np.int32).num] # int32 sentinel

# ------------------------------------------------------------
def __new__(cls, arr, **kwargs):
return super().__new__(cls, arr)
Expand Down Expand Up @@ -7023,6 +7204,22 @@ def __str__(self):
def _np(self):
return self.view(np.int32)

# ------------------------------------------------------------
def isnan(self):
return self == DateSpanScalar.NAN_DATESPANSCALAR

# ------------------------------------------------------------
def isnotnan(self):
return self != DateSpanScalar.NAN_DATESPANSCALAR

# ------------------------------------------------------------
def isfinite(self):
return self != DateSpanScalar.NAN_DATESPANSCALAR

# ------------------------------------------------------------
def isnotfinite(self):
return self == DateSpanScalar.NAN_DATESPANSCALAR

# ------------------------------------------------------------
@property
def _fa(self):
Expand Down Expand Up @@ -7133,6 +7330,18 @@ def strftime(self, format):
def isnan(self):
return self <= 0

# ------------------------------------------------------------
def isnotnan(self):
return self > 0

# ------------------------------------------------------------
def isfinite(self):
return self > 0

# ------------------------------------------------------------
def isnotfinite(self):
return self <= 0

# ------------------------------------------------------------
@property
def _np(self):
Expand Down Expand Up @@ -7262,6 +7471,22 @@ def strftime(self, format):
"""
return self._strftime(format)

# ------------------------------------------------------------
def isnan(self):
return math.isnan(self)

# ------------------------------------------------------------
def isnotnan(self):
return not math.isnan(self)

# ------------------------------------------------------------
def isfinite(self):
return math.isfinite(self)

# ------------------------------------------------------------
def isnotfinite(self):
return not math.isfinite(self)

# ------------------------------------------------------------
def get_classname(self):
return __class__.__name__
Expand Down
41 changes: 29 additions & 12 deletions riptable/rt_groupbyops.py
Original file line number Diff line number Diff line change
Expand Up @@ -957,6 +957,7 @@ def null(self, showfilter=False):
TypeRegister.Dataset({}), self.gb_keychain, None, addkeys=True, showfilter=showfilter
)

_USE_FAST_COUNT_UNIQUES = False # re-enable/delete slow once riptable#209 is fixed.
# ---------------------------------------------------------------
def count_uniques(self, *args, **kwargs):
"""
Expand Down Expand Up @@ -989,19 +990,35 @@ def count_uniques(self, *args, **kwargs):
label_keys = self.gb_keychain
g = self.grouping

filter = kwargs["filter"] if "filter" in kwargs else None
transform = kwargs["transform"] if "transform" in kwargs else None
if GroupByOps._USE_FAST_COUNT_UNIQUES:
filter = kwargs["filter"] if "filter" in kwargs else None
transform = kwargs["transform"] if "transform" in kwargs else None

newdict = {}
for colname, arr in origdict.items():
gbk = label_keys.gbkeys
if colname not in gbk:
mcat = TypeRegister.Categorical([self, arr], filter=filter)
if transform:
result = self.nansum(mcat.first_bool, transform=transform)[0]
else:
result = mcat.null()[0].count().Count
newdict[colname] = result

else: # slow code
# get way to make groups contiguous
igroup = g.igroup
cutoffs = g.ncountgroup.cumsum(dtype=np.int64)[1:]
newdict = {}
for colname, arr in origdict.items():
gbk = label_keys.gbkeys
if colname not in gbk:
ifirstkey = groupbyhash(arr[igroup], cutoffs=cutoffs)["iFirstKey"][1]
# the cutoffs will generate iFirstKey cutoffs that help us determine the unique counts
result = ifirstkey.diff()
result[0] = ifirstkey[0]
newdict[colname] = result

newdict = {}
for colname, arr in origdict.items():
gbk = label_keys.gbkeys
if colname not in gbk:
mcat = TypeRegister.Categorical([self, arr], filter=filter)
if transform:
result = self.nansum(mcat.first_bool, transform=transform)[0]
else:
result = mcat.null()[0].count().Count
newdict[colname] = result
return g._finalize_dataset(newdict, label_keys, None, addkeys=True, **kwargs)

def _gb_keyword_wrapper(
Expand Down
Loading

0 comments on commit 1a06bcb

Please sign in to comment.