-
Notifications
You must be signed in to change notification settings - Fork 27
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
1 changed file
with
390 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,390 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"import pandas as pd\n", | ||
"import numpy as np\n", | ||
"import xgboost as xgb\n", | ||
"from sklearn.model_selection import StratifiedKFold, KFold\n", | ||
"import matplotlib.pyplot as plt\n", | ||
"from sklearn import preprocessing\n", | ||
"import warnings\n", | ||
"warnings.filterwarnings('ignore')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"transaction_history = pd.read_csv('data/FT_Camp_2/sz_detail.csv')\n", | ||
"sz_id_inf = pd.read_csv('data/FT_Camp_2/trx_cod.csv')\n", | ||
"g2_cod_inf = pd.read_csv('data/FT_Camp_2/g2.csv')\n", | ||
"cuts_inf = pd.read_csv('data/FT_Camp_2/cust_bas_inf.csv')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"## 用户信息, 性别转类别特征,划分年龄区间\n", | ||
"cuts_inf['gender'] = cuts_inf['gender'].astype('category').cat.codes\n", | ||
"cuts_inf['age'] = cuts_inf.age.apply(lambda x: int(x) if x not in ['3', '\\\\N'] else np.nan)\n", | ||
"cuts_inf['age_range'] = pd.cut(cuts_inf['age'], [0, 19, 29, 39, 49, 59, 100], labels=False).astype('category').cat.codes\n", | ||
"cuts_inf['aum227'] = cuts_inf.aum227.apply(lambda x: float(x) if x != '\\\\N' else np.nan)\n", | ||
"cuts_inf['aum306'] = cuts_inf.aum306.apply(lambda x: float(x) if x != '\\\\N' else np.nan)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"## 收支分类 转类别特征\n", | ||
"sz_id_inf['cat1'] = sz_id_inf['cat1'].astype('category').cat.codes" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"## sz_id g2 类别编码 train test 保持一致的编码\n", | ||
"transaction_history = transaction_history.sort_values(['id', 'prt_dt']).reset_index(drop=True)\n", | ||
"transaction_history['g2_cod'] = transaction_history['g2_cod'].fillna('-1')\n", | ||
"sz_le = preprocessing.LabelEncoder()\n", | ||
"sz_le.fit(transaction_history.sz_id.values)\n", | ||
"g2_le = preprocessing.LabelEncoder()\n", | ||
"g2_le.fit(transaction_history.g2_cod.values)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"### 特征提取函数\n", | ||
"def get_feature(end_time, data_name):\n", | ||
" if data_name == 'train':\n", | ||
" data = pd.read_csv('data/FT_Camp_2/train.csv')\n", | ||
" else:\n", | ||
" data = pd.read_csv('data/FT_Camp_2/pred_users.csv')\n", | ||
" \n", | ||
" transaction_history_data = transaction_history[(transaction_history['prt_dt']<=end_time)].reset_index(drop=True)\n", | ||
"\n", | ||
" ## 将一些辅助类别信息补充到交易信息表里\n", | ||
" transaction_history_data = pd.merge(transaction_history_data, cuts_inf[['id', 'gender', 'age', 'age_range']], on=['id'], how='left')\n", | ||
" transaction_history_data = pd.merge(transaction_history_data, sz_id_inf[['sz_id', 'cat1']], on=['sz_id'], how='left')\n", | ||
"\n", | ||
" transaction_history_data['sz_id'] = sz_le.transform(transaction_history_data['sz_id'].values)\n", | ||
" transaction_history_data['g2_cod'] = g2_le.transform(transaction_history_data['g2_cod'].values)\n", | ||
"\n", | ||
" transaction_history_data = transaction_history_data[['id', 'sz_id', 'cat1', 'g2_cod', 'gender', 'age', 'age_range', 'rmb_amt','prt_dt']]\n", | ||
"\n", | ||
" ## 训练集用户属性 信息\n", | ||
" data = pd.merge(data, cuts_inf[['id', 'gender', 'age', 'age_range']], on=['id'], how='left')\n", | ||
"\n", | ||
" ## 训练集用户记录里最多的 sz_id g2_cod\n", | ||
" temp = transaction_history_data.groupby(['id', 'sz_id'], as_index=False)['prt_dt'].count()\\\n", | ||
" .sort_values(['id', 'prt_dt'], ascending=[True, False]).groupby('id').apply(lambda x: x['sz_id'].values[0])\\\n", | ||
" .reset_index().rename(columns={0:'sz_id'})\n", | ||
" data = pd.merge(data, temp, on=['id'], how='left')\n", | ||
" data['sz_id'] = data['sz_id'].astype('category').cat.codes\n", | ||
"\n", | ||
" temp = transaction_history_data.groupby(['id', 'g2_cod'], as_index=False)['prt_dt'].count()\\\n", | ||
" .sort_values(['id', 'prt_dt'], ascending=[True, False]).groupby('id').apply(lambda x: x['g2_cod'].values[0])\\\n", | ||
" .reset_index().rename(columns={0:'g2_cod'})\n", | ||
" data = pd.merge(data, temp, on=['id'], how='left')\n", | ||
" data['g2_cod'] = data['g2_cod'].astype('category').cat.codes\n", | ||
"\n", | ||
" ### 用户交易条目数和交易天数 平均每天交易数\n", | ||
" temp = transaction_history_data.groupby('id', as_index=False)['prt_dt'].agg({'user_trans_count':'count'})\n", | ||
" data = pd.merge(data, temp, on=['id'], how='left')\n", | ||
"\n", | ||
" temp = transaction_history_data.groupby('id', as_index=False)['prt_dt'].agg({'user_trans_day_count':'nunique'})\n", | ||
" data = pd.merge(data, temp, on=['id'], how='left')\n", | ||
"\n", | ||
" data['user_trans_day_mean'] = data['user_trans_count']/data['user_trans_day_count']\n", | ||
"\n", | ||
" ## 用户 总交易额 收入 支出 条目、天平均\n", | ||
" temp = transaction_history_data[transaction_history_data['rmb_amt']>=0].groupby('id', as_index=False)['rmb_amt'].agg({'user_sr_sum':'sum'})\n", | ||
" data = pd.merge(data, temp, on=['id'], how='left')\n", | ||
"\n", | ||
" temp = transaction_history_data[transaction_history_data['rmb_amt']<0].groupby('id', as_index=False)['rmb_amt'].agg({'user_zc_sum':'sum'})\n", | ||
" data = pd.merge(data, temp, on=['id'], how='left')\n", | ||
"\n", | ||
" data['user_sr_+_zc'] = data['user_sr_sum'] + data['user_zc_sum']\n", | ||
" data['user_sr_-_zc'] = data['user_sr_sum'] - data['user_zc_sum']\n", | ||
"\n", | ||
" for i in ['user_sr_sum', 'user_zc_sum', 'user_sr_+_zc', 'user_sr_-_zc']:\n", | ||
" col = i + '/trans'\n", | ||
" data[col] = data[i]/data['user_trans_count']\n", | ||
" col = i + '/days'\n", | ||
" data[col] = data[i]/data['user_trans_day_count']\n", | ||
"\n", | ||
" ## 用户 总交易额 收入 支出 均值,方差 统计值\n", | ||
" temp = transaction_history_data.groupby('id', as_index=False)['rmb_amt']\\\n", | ||
" .agg({'user_rmb_mean':'mean', 'user_rmb_std':'std'})\n", | ||
" data = pd.merge(data, temp, on=['id'], how='left')\n", | ||
"\n", | ||
" temp = transaction_history_data[transaction_history_data['rmb_amt']>=0].groupby('id', as_index=False)['rmb_amt']\\\n", | ||
" .agg({'user_sr_mean':'mean', 'user_sr_std':'std'})\n", | ||
" data = pd.merge(data, temp, on=['id'], how='left')\n", | ||
"\n", | ||
" temp = transaction_history_data[transaction_history_data['rmb_amt']<0].groupby('id', as_index=False)['rmb_amt']\\\n", | ||
" .agg({'user_zc_mean':'mean', 'user_zc_std':'std'})\n", | ||
" data = pd.merge(data, temp, on=['id'], how='left')\n", | ||
"\n", | ||
" ## 用户 sz三种不同类型计数占比\n", | ||
" temp = transaction_history_data.groupby(['id', 'cat1'])['prt_dt'].count().unstack().reset_index()\\\n", | ||
" .rename(columns={0:'user_cat1_0_count', 1:'user_cat1_1_count', 2:'user_cat1_2_count'})\n", | ||
" data = pd.merge(data, temp, on=['id'], how='left')\n", | ||
"\n", | ||
" for i in ['user_cat1_0_count', 'user_cat1_1_count', 'user_cat1_2_count']:\n", | ||
" col = i.replace('count', 'ratio')\n", | ||
" data[col] = data[i]/data['user_trans_count']\n", | ||
"\n", | ||
" ## 用户 交易的第一天和最后一天\n", | ||
" temp = transaction_history_data.groupby('id', as_index=False)['prt_dt'].agg({'user_first_day':'min', 'user_last_day':'max'})\n", | ||
" data = pd.merge(data, temp, on=['id'], how='left')\n", | ||
"\n", | ||
" data['user_first_day'] = data['user_first_day'].astype('category').cat.codes\n", | ||
" data['user_last_day'] = data['user_last_day'].astype('category').cat.codes\n", | ||
"\n", | ||
" ## 用户 交易记录 sz_id 占比\n", | ||
" temp = transaction_history_data.groupby(['id', 'sz_id'], as_index=False)['prt_dt'].count()\n", | ||
" temp = pd.merge(temp, transaction_history_data[['id', 'prt_dt']].groupby('id', as_index=False)['prt_dt'].agg({'sz_id_ratio': 'count'}),\\\n", | ||
" on=['id'], how='left')\n", | ||
" temp['sz_id_ratio'] = temp['prt_dt']/temp['sz_id_ratio']\n", | ||
" temp = pd.pivot_table(temp[['id', 'sz_id', 'sz_id_ratio']], index=['id', 'sz_id']).unstack().reset_index().fillna(0)\n", | ||
" data = pd.merge(data, temp, on=['id'], how='left')\n", | ||
"\n", | ||
" ## 用户 交易记录 g2_cod 占比\n", | ||
" temp = transaction_history_data.groupby(['id', 'g2_cod'], as_index=False)['prt_dt'].count()\n", | ||
" temp = pd.merge(temp, transaction_history_data[['id', 'prt_dt']].groupby('id', as_index=False)['prt_dt'].agg({'g2_cod_ratio': 'count'}),\\\n", | ||
" on=['id'], how='left')\n", | ||
" temp['g2_cod_ratio'] = temp['prt_dt']/temp['g2_cod_ratio']\n", | ||
" temp = pd.pivot_table(temp[['id', 'g2_cod', 'g2_cod_ratio']], index=['id', 'g2_cod']).unstack().reset_index().fillna(0)\n", | ||
" data = pd.merge(data, temp, on=['id'], how='left')\n", | ||
" \n", | ||
" ## 用户余额\n", | ||
" if data_name == 'train':\n", | ||
" data = pd.merge(data, cuts_inf[['id', 'aum227']], on=['id'], how='left').rename(columns={'aum227':'aum_last'})\n", | ||
" else:\n", | ||
" data = pd.merge(data, cuts_inf[['id', 'aum306']], on=['id'], how='left').rename(columns={'aum306':'aum_last'})\n", | ||
" \n", | ||
" ## 用户 交易记录 sz_id 金额求和\n", | ||
" temp = transaction_history_data.groupby(['id', 'sz_id'], as_index=False)['rmb_amt'].agg({'sz_id_amt':'sum'})\n", | ||
" temp = pd.pivot_table(temp[['id', 'sz_id', 'sz_id_amt']], index=['id', 'sz_id']).unstack().reset_index().fillna(0)\n", | ||
" data = pd.merge(data, temp, on=['id'], how='left')\n", | ||
" \n", | ||
" ## 用户 交易记录 g2_cod 金额求和\n", | ||
" temp = transaction_history_data.groupby(['id', 'g2_cod'], as_index=False)['rmb_amt'].agg({'g2_cod_amt':'sum'})\n", | ||
" temp = pd.pivot_table(temp[['id', 'g2_cod', 'g2_cod_amt']], index=['id', 'g2_cod']).unstack().reset_index().fillna(0)\n", | ||
" data = pd.merge(data, temp, on=['id'], how='left')\n", | ||
" \n", | ||
" return data" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"## 1-1 ~ 2-27 训练集提取特征 1-1 ~ 3-6 测试集提取特征\n", | ||
"train = get_feature('2019-02-27', 'train')\n", | ||
"test = get_feature('2019-03-06', 'test')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"## 处理列名格式\n", | ||
"rename_col = []\n", | ||
"for i in train.columns:\n", | ||
" if isinstance(i, tuple):\n", | ||
" rename_col.append(i[0]+'_'+str(i[1]))\n", | ||
" else:\n", | ||
" rename_col.append(i)\n", | ||
"train.columns = rename_col\n", | ||
"\n", | ||
"rename_col = []\n", | ||
"for i in test.columns:\n", | ||
" if isinstance(i, tuple):\n", | ||
" rename_col.append(i[0]+'_'+str(i[1]))\n", | ||
" else:\n", | ||
" rename_col.append(i)\n", | ||
"test.columns = rename_col" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"y = train['click_w228'].values" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"## 模型用到的210个特征\n", | ||
"## 具体筛选方式是先跑一折xgb模型,保留特征重要度大于10的210个特征,后期固定下来所以略去具体的训练步骤\n", | ||
"col_lst = [ i for i in train.columns if i in [\n", | ||
" 'aum_last', 'age', 'user_cat1_1_ratio', 'user_trans_day_mean','sz_id_ratio_28', 'sz_id_amt_31', 'user_cat1_0_ratio',\n", | ||
" 'user_sr_std', 'user_cat1_2_ratio', 'user_zc_std', 'sz_id_amt_53','user_sr_+_zc', 'user_cat1_2_count', 'user_sr_+_zc/trans',\n", | ||
" 'user_first_day', 'g2_cod_amt_221', 'user_cat1_1_count', 'sz_id_ratio_31', 'sz_id_ratio_53', 'user_sr_+_zc/days',\n", | ||
" 'sz_id_ratio_15', 'user_sr_mean', 'user_cat1_0_count','g2_cod_ratio_239', 'user_last_day', 'sz_id_amt_28',\n", | ||
" 'g2_cod_ratio_221', 'sz_id_ratio_32', 'g2_cod_ratio_223','g2_cod_ratio_203', 'sz_id_ratio_39', 'sz_id_amt_15',\n", | ||
" 'sz_id_ratio_42', 'sz_id_ratio_52', 'g2_cod_amt_265','user_zc_sum', 'user_rmb_mean', 'sz_id_amt_52', 'sz_id_amt_54',\n", | ||
" 'sz_id_ratio_40', 'g2_cod_amt_223', 'g2_cod_ratio_201','user_sr_sum', 'user_rmb_std', 'sz_id_ratio_54', 'g2_cod_amt_201',\n", | ||
" 'g2_cod_amt_243', 'g2_cod_ratio_243', 'sz_id_amt_32','g2_cod_amt_203', 'sz_id_ratio_45', 'sz_id_ratio_1',\n", | ||
" 'user_trans_count', 'sz_id_amt_10', 'g2_cod_ratio_158','user_trans_day_count', 'sz_id_amt_42', 'sz_id_ratio_30',\n", | ||
" 'g2_cod_ratio_117', 'sz_id_ratio_27', 'g2_cod_amt_306','g2_cod_ratio_265', 'user_sr_sum/days', 'sz_id_amt_45',\n", | ||
" 'user_zc_sum/days', 'user_sr_-_zc/days', 'user_sr_sum/trans','sz_id_amt_40', 'user_zc_mean', 'sz_id_amt_33', 'sz_id_ratio_7',\n", | ||
" 'sz_id_ratio_33', 'sz_id_amt_39', 'sz_id_ratio_16', 'g2_cod_ratio_304', 'sz_id_ratio_2', 'g2_cod_ratio_306',\n", | ||
" 'g2_cod_ratio_266', 'user_sr_-_zc', 'g2_cod_ratio_300','g2_cod_ratio_303', 'sz_id_ratio_3', 'user_zc_sum/trans',\n", | ||
" 'sz_id_amt_27', 'g2_cod_amt_253', 'g2_cod_amt_304','g2_cod_amt_117', 'sz_id_ratio_10', 'g2_cod_amt_300', 'gender',\n", | ||
" 'g2_cod_ratio_119', 'g2_cod_amt_303', 'g2_cod_amt_112','sz_id_ratio_19', 'g2_cod_ratio_278', 'g2_cod_ratio_253',\n", | ||
" 'user_sr_-_zc/trans', 'sz_id', 'g2_cod_ratio_345', 'g2_cod','g2_cod_ratio_129', 'sz_id_amt_7', 'sz_id_amt_24', 'sz_id_amt_30',\n", | ||
" 'g2_cod_ratio_222', 'sz_id_amt_1', 'g2_cod_amt_340','g2_cod_ratio_31', 'sz_id_amt_19', 'sz_id_amt_3', 'sz_id_amt_41',\n", | ||
" 'sz_id_amt_16', 'g2_cod_ratio_340', 'sz_id_ratio_29','g2_cod_ratio_185', 'sz_id_amt_46', 'g2_cod_amt_31',\n", | ||
" 'g2_cod_amt_158', 'sz_id_amt_11', 'g2_cod_ratio_112','g2_cod_ratio_121', 'sz_id_amt_2', 'sz_id_ratio_41',\n", | ||
" 'sz_id_ratio_24', 'sz_id_ratio_11', 'g2_cod_amt_346','sz_id_ratio_6', 'sz_id_ratio_46', 'g2_cod_amt_278',\n", | ||
" 'g2_cod_amt_119', 'g2_cod_amt_345', 'g2_cod_amt_239','sz_id_amt_29', 'g2_cod_ratio_346', 'g2_cod_amt_121',\n", | ||
" 'g2_cod_amt_157', 'sz_id_amt_37', 'sz_id_ratio_34','g2_cod_ratio_34', 'sz_id_amt_0', 'sz_id_ratio_26',\n", | ||
" 'g2_cod_amt_222', 'g2_cod_amt_185', 'sz_id_ratio_17','g2_cod_amt_34', 'g2_cod_ratio_314', 'sz_id_amt_6',\n", | ||
" 'g2_cod_ratio_187', 'sz_id_amt_12', 'sz_id_ratio_12','g2_cod_amt_268', 'g2_cod_amt_129', 'sz_id_ratio_36',\n", | ||
" 'sz_id_amt_17', 'sz_id_ratio_37', 'g2_cod_ratio_268', 'age_range','g2_cod_ratio_283', 'g2_cod_amt_41', 'g2_cod_amt_269',\n", | ||
" 'sz_id_ratio_23', 'sz_id_ratio_4', 'g2_cod_amt_187','g2_cod_ratio_262', 'g2_cod_ratio_293', 'g2_cod_ratio_128',\n", | ||
" 'sz_id_ratio_0', 'g2_cod_ratio_148', 'g2_cod_amt_283','sz_id_amt_55', 'g2_cod_ratio_130', 'sz_id_amt_49',\n", | ||
" 'g2_cod_amt_120', 'g2_cod_amt_293', 'g2_cod_amt_266', 'g2_cod_ratio_120', 'g2_cod_ratio_267', 'g2_cod_ratio_172',\n", | ||
" 'g2_cod_amt_314', 'g2_cod_ratio_263', 'sz_id_amt_34','sz_id_amt_26', 'g2_cod_amt_267', 'g2_cod_ratio_174',\n", | ||
" 'g2_cod_ratio_269', 'g2_cod_amt_134', 'sz_id_amt_4','sz_id_amt_36', 'sz_id_ratio_44', 'g2_cod_ratio_157',\n", | ||
" 'g2_cod_amt_174', 'sz_id_ratio_25', 'sz_id_amt_25', 'sz_id_amt_8','sz_id_amt_9', 'g2_cod_ratio_292', 'g2_cod_ratio_349',\n", | ||
" 'g2_cod_amt_263', 'g2_cod_amt_130', 'sz_id_amt_22','sz_id_ratio_9', 'g2_cod_amt_206', 'sz_id_ratio_22',\n", | ||
" 'g2_cod_ratio_350', 'sz_id_ratio_51', 'sz_id_amt_51','g2_cod_ratio_41', 'sz_id_ratio_47', 'sz_id_ratio_5',\n", | ||
" 'g2_cod_amt_172']]" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"## 模型训练部分,分层抽样,10折cv只跑一折即可\n", | ||
"skf = StratifiedKFold(n_splits=10, random_state=42, shuffle=True)\n", | ||
"auc_list = []\n", | ||
"sub_list = []\n", | ||
"\n", | ||
"for k, (train_idx, test_idx) in enumerate(skf.split(y, y)):\n", | ||
" if k in [9]:\n", | ||
" print(k+1)\n", | ||
" \n", | ||
" X_train, X_test, y_train, y_test = \\\n", | ||
" train[col_lst].iloc[train_idx], train[col_lst].iloc[test_idx], y[train_idx], y[test_idx]\n", | ||
"\n", | ||
" xgb_train = xgb.DMatrix(X_train, label=y_train)\n", | ||
" xgb_val = xgb.DMatrix(X_test, label=y_test)\n", | ||
"\n", | ||
" \n", | ||
" params = {\n", | ||
" 'booster': 'gbtree',\n", | ||
" 'objective': 'binary:logistic',\n", | ||
" 'eval_metric': 'auc',\n", | ||
" 'gamma': 0.1,\n", | ||
" 'min_child_weight': 1.1,\n", | ||
" 'learning_rate' : 0.01,\n", | ||
" 'max_depth': 5,\n", | ||
" 'subsample': 0.8,\n", | ||
" 'colsample_bytree': 0.8,\n", | ||
" 'colsample_bylevel': 0.8,\n", | ||
" 'lambda': 10,\n", | ||
" 'verbose_eval': 1,\n", | ||
" 'nthread': 6,\n", | ||
" 'silent': 1,\n", | ||
" }\n", | ||
"\n", | ||
" evallist = [(xgb_train, 'train'), (xgb_val, 'eval')]\n", | ||
" gbm = xgb.train(params, xgb_train, 3000, evallist, early_stopping_rounds=60, verbose_eval=50) \n", | ||
" \n", | ||
" auc_list.append(gbm.best_score)\n", | ||
" sub_list.append(gbm.predict(xgb.DMatrix(test[col_lst]), ntree_limit=gbm.best_ntree_limit)) " | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"test['score'] = sub_list[0]\n", | ||
"test[['id', 'score']].to_csv('ques2_210fea_cv1.csv', index=None)" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python [conda env:py3]", | ||
"language": "python", | ||
"name": "conda-env-py3-py" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.6.5" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |