Skip to content

Commit

Permalink
fill_nan(None) 严重拖慢速度,所以还是由用户自己处理更合适
Browse files Browse the repository at this point in the history
  • Loading branch information
wukan1986 committed Mar 5, 2024
1 parent e167fe4 commit a222cf3
Showing 1 changed file with 1 addition and 8 deletions.
9 changes: 1 addition & 8 deletions polars_ta/wq/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,41 +7,36 @@


def cs_standardize_zscore(x: Expr, ddof: int = 0) -> Expr:
x = x.fill_nan(None)
return (x - x.mean()) / x.std(ddof=ddof)


def cs_standardize_minmax(x: Expr) -> Expr:
x = x.fill_nan(None)
a = x.min()
b = x.max()
return (x - a) / (b - a + TA_EPSILON)


def cs_winsorize_quantile(x: Expr, low_limit: float = 0.025, up_limit: float = 0.995) -> Expr:
x = x.fill_nan(None)
a = x.quantile(low_limit)
b = x.quantile(up_limit)
return x.clip(lower_bound=a, upper_bound=b)


def cs_winsorize_3sigma(x: Expr, n: float = 3.) -> Expr:
x = x.fill_nan(None)
# fill_nan(None) 严重拖慢速度,所以还是由用户自己处理更合适
a = x.mean()
b = n * x.std(ddof=0)
return x.clip(lower_bound=a - b, upper_bound=a + b)


def cs_winsorize_mad(x: Expr, n: float = 3., k: float = 1.4826) -> Expr:
# https://en.wikipedia.org/wiki/Median_absolute_deviation
x = x.fill_nan(None)
a = x.median()
b = (n * k) * (x - a).abs().median()
return x.clip(lower_bound=a - b, upper_bound=a + b)


def cs_neutralize_demean(x: Expr) -> Expr:
x = x.fill_nan(None)
return x - x.mean()


Expand All @@ -52,8 +47,6 @@ def cs_neutralize_residual_simple(y: Expr, x: Expr) -> Expr:
# e_i = y_i - a - bx_i
# = y_i - ȳ + bx̄ - bx_i
# = y_i - ȳ - b(x_i - x̄)
x = x.fill_nan(None)
y = y.fill_nan(None)
x_demeaned = x - x.mean()
y_demeaned = y - y.mean()
x_demeaned_squared = x_demeaned.pow(2)
Expand Down

0 comments on commit a222cf3

Please sign in to comment.