diff --git a/featexp/base.py b/featexp/base.py index b4596f8..7952cc0 100644 --- a/featexp/base.py +++ b/featexp/base.py @@ -27,7 +27,7 @@ def get_grouped_data(input_data, feature, target_col, bins, cuts=0): reduced_cuts = 0 for i in range(1, bins + 1): next_cut = np.percentile(input_data[feature], i * 100 / bins) - if next_cut != prev_cut: + if next_cut > prev_cut + .000001: # float numbers shold be compared with some threshold! cuts.append(next_cut) else: reduced_cuts = reduced_cuts + 1 @@ -265,4 +265,4 @@ def get_trend_stats(data, target_col, features_list=0, bins=10, data_test=0): print('Categorical features ' + str(ignored) + ' ignored. Categorical features not supported yet.') print('Returning stats for all numeric features') - return (stats_all_df) \ No newline at end of file + return (stats_all_df)