-
Notifications
You must be signed in to change notification settings - Fork 114
/
example_boston.py
43 lines (36 loc) · 1.54 KB
/
example_boston.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import numpy as np
import pandas as pd
from sklearn.ensemble import GradientBoostingRegressor,GradientBoostingClassifier
from rulefit import RuleFit
boston_data = pd.read_csv("boston.csv", index_col=0)
y = boston_data.medv.values
X = boston_data.drop("medv", axis=1)
features = X.columns
X = X.values
typ = 'classifier' #regressor or classifier
if typ == 'regressor':
rf = RuleFit(tree_size=4, sample_fract='default', max_rules=2000,
memory_par=0.01, tree_generator=None,
rfmode='regress', lin_trim_quantile=0.025,
lin_standardise=True, exp_rand_tree_size=True, random_state=1)
rf.fit(X, y, feature_names=features)
y_pred = rf.predict(X)
insample_rmse = np.sqrt(np.sum((y_pred - y)**2)/len(y))
elif typ == 'classifier':
y_class = y.copy()
y_class[y_class < 21] = -1
y_class[y_class >= 21] = +1
N = X.shape[0]
rf = RuleFit(tree_size=4, sample_fract='default', max_rules=2000,
memory_par=0.01, tree_generator=None,
rfmode='classify', lin_trim_quantile=0.025,
lin_standardise=True, exp_rand_tree_size=True, random_state=1)
rf.fit(X, y_class, feature_names=features)
y_pred = rf.predict(X)
y_proba = rf.predict_proba(X)
insample_acc = sum(y_pred == y_class) / len(y_class)
rules = rf.get_rules()
rules = rules[rules.coef != 0].sort_values(by="support")
num_rules_rule = len(rules[rules.type == 'rule'])
num_rules_linear = len(rules[rules.type == 'linear'])
print(rules.sort_values('importance', ascending=False))