-
Notifications
You must be signed in to change notification settings - Fork 22
/
Copy pathExample.py
53 lines (41 loc) · 1.42 KB
/
Example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# -*- coding: utf-8 -*-
import warnings
warnings.filterwarnings('ignore')
from pysmatch.Matcher import Matcher
import pandas as pd
import numpy as np
print('get data')
path = "misc/loan.csv"
data = pd.read_csv(path)
print('get data')
test = data[data.loan_status == "Default"]
control = data[data.loan_status == "Fully Paid"]
test['loan_status'] = 1
control['loan_status'] = 0
m = Matcher(test, control, yvar="loan_status", exclude=[])
np.random.seed(20240919)
# ============ (1) Noraml train (Without optuna) =============
# m.fit_scores(balance=True, nmodels=10, n_jobs=3, model_type='knn')
# m.fit_scores(balance=True, nmodels=10, n_jobs=3, model_type='tree', max_iter=100)
m.fit_scores(balance=True, nmodels=10, n_jobs=3, model_type='linear', max_iter=200)
# ============ (2) Utilize optuna (Only train one best model) =============
# m.fit_scores(
# balance=True,
# model_type='tree',
# max_iter=200,
# use_optuna=True,
# n_trials=15
# )
m.predict_scores()
m.plot_scores()
m.tune_threshold(method='random')
m.match(method="min", nmatches=1, threshold=1, replacement=False)
m.plot_matched_scores()
freq_df = m.record_frequency()
m.assign_weight_vector()
print("top 6 matched data")
print(m.matched_data.sort_values("match_id").head(6))
categorical_results = m.compare_categorical(return_table=True, plot_result=True)
print(categorical_results)
cc = m.compare_continuous(return_table=True, plot_result=True)
print(cc)