forked from acredjb/FBP
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathFBP_ML_XGBRegressor.py
115 lines (102 loc) · 3.99 KB
/
FBP_ML_XGBRegressor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Author: Daijingbo
# @Date : 2019/5/27
# @Desc :FBP ML
# http://www.captainbed.net/blog-acredjb
import pandas as pd
import matplotlib.pyplot as plt
import xgboost as xgb
from sklearn import preprocessing
import numpy as np
from xgboost import plot_importance
from sklearn.preprocessing import Imputer
# from sklearn.cross_validation import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
def featureSet(data):
imputer = Imputer(missing_values='NaN', strategy='mean', axis=1)
imputer.fit(data.loc[:, ['10bet', 'jbb', 'ms', 'ysb', 'Pinnacle', 'SNAI']])
x_new = imputer.transform(data.loc[:, ['10bet', 'jbb', 'ms', 'ysb', 'Pinnacle', 'SNAI']])
data_num = len(data)
XList = []
for row in range(0, data_num):
tmp_list = []
tmp_list.append(data.iloc[row]['Oddset'])
tmp_list.append(data.iloc[row]['li'])
tmp_list.append(data.iloc[row]['bet365'])
tmp_list.append(data.iloc[row]['interw'])
tmp_list.append(data.iloc[row]['wl'])
tmp_list.append(data.iloc[row]['w'])
tmp_list.append(data.iloc[row]['ao'])
# tmp_list.append(data.iloc[row]['10bet'])
# tmp_list.append(data.iloc[row]['jbb'])
# tmp_list.append(data.iloc[row]['ms'])
# tmp_list.append(data.iloc[row]['ysb'])
tmp_list.append(x_new[row][0])
tmp_list.append(x_new[row][1])
tmp_list.append(x_new[row][2])
tmp_list.append(x_new[row][3])
tmp_list.append(x_new[row][4])
tmp_list.append(x_new[row][5])
XList.append(tmp_list)
yList = data.y.values
return XList, yList
def loadTestData(filePath):
data = pd.read_csv(filepath_or_buffer=filePath)
imputer = Imputer(missing_values='NaN', strategy='mean', axis=1)
imputer.fit(data.loc[:, ['10bet', 'jbb', 'ms', 'ysb', 'Pinnacle', 'SNAI']])
x_new = imputer.transform(data.loc[:, ['10bet', 'jbb', 'ms', 'ysb', 'Pinnacle', 'SNAI']])
data_num = len(data)
XList = []
for row in range(0, data_num):
tmp_list = []
tmp_list.append(data.iloc[row]['Oddset'])
tmp_list.append(data.iloc[row]['li'])
tmp_list.append(data.iloc[row]['bet365'])
tmp_list.append(data.iloc[row]['interw'])
tmp_list.append(data.iloc[row]['wl'])
tmp_list.append(data.iloc[row]['w'])
tmp_list.append(data.iloc[row]['ao'])
# tmp_list.append(data.iloc[row]['10bet'])
# tmp_list.append(data.iloc[row]['jbb'])
# tmp_list.append(data.iloc[row]['ms'])
# tmp_list.append(data.iloc[row]['ysb'])
tmp_list.append(x_new[row][0])
tmp_list.append(x_new[row][1])
tmp_list.append(x_new[row][2])
tmp_list.append(x_new[row][3])
tmp_list.append(x_new[row][4])
tmp_list.append(x_new[row][5])
XList.append(tmp_list)
return XList
def trainandTest(X_train, y_train, X_test):
# XGBoost训练过程
model = xgb.XGBRegressor(max_depth=2, learning_rate=0.01, n_estimators=500, silent=False, objective='reg:gamma')
model.fit(X_train, y_train)
# 对测试集进行预测
ans = model.predict(X_test)
ans_len = len(ans)
# print(ans_len)
# print('---------------')
# print('ans(0):'+str(ans[1]))
# print('')
id_list = np.arange(5709, 6108)
data_arr = []
for row in range(0, ans_len):
data_arr.append([int(id_list[row]), ans[row]])
print(ans[row])
np_data = np.array(data_arr)
# 写入文件
pd_data = pd.DataFrame(np_data, columns=['id', 'y'])
pd_data.to_csv('FBP_submit.csv', index=None)
# 显示重要特征
plot_importance(model)
plt.show()
if __name__ == '__main__':
trainFilePath = 'E:/PythonLearn/pc_ex/AdaBoost_PeiLv/FBP_train.csv'
testFilePath = 'E:/PythonLearn/pc_ex/AdaBoost_PeiLv/FBP_predict.csv'
data = pd.read_csv(trainFilePath)
X_train, y_train = featureSet(data)
X_test = loadTestData(testFilePath)
trainandTest(X_train, y_train, X_test)