|
1 |
| -#! /usr/bin/python |
| 1 | +#!/usr/bin/python |
| 2 | + |
| 3 | +from __future__ import division |
| 4 | + |
2 | 5 | import numpy as np
|
3 | 6 | import xgboost as xgb
|
4 | 7 |
|
5 | 8 | # label need to be 0 to num_class -1
|
6 |
| -data = np.loadtxt('./dermatology.data', delimiter=',',converters={33: lambda x:int(x == '?'), 34: lambda x:int(x)-1 } ) |
| 9 | +data = np.loadtxt('./dermatology.data', delimiter=',', |
| 10 | + converters={33: lambda x:int(x == '?'), 34: lambda x:int(x)-1}) |
7 | 11 | sz = data.shape
|
8 | 12 |
|
9 | 13 | train = data[:int(sz[0] * 0.7), :]
|
10 | 14 | test = data[int(sz[0] * 0.7):, :]
|
11 | 15 |
|
12 |
| -train_X = train[:,0:33] |
| 16 | +train_X = train[:, :33] |
13 | 17 | train_Y = train[:, 34]
|
14 | 18 |
|
15 |
| - |
16 |
| -test_X = test[:,0:33] |
| 19 | +test_X = test[:, :33] |
17 | 20 | test_Y = test[:, 34]
|
18 | 21 |
|
19 |
| -xg_train = xgb.DMatrix( train_X, label=train_Y) |
| 22 | +xg_train = xgb.DMatrix(train_X, label=train_Y) |
20 | 23 | xg_test = xgb.DMatrix(test_X, label=test_Y)
|
21 | 24 | # setup parameters for xgboost
|
22 | 25 | param = {}
|
|
29 | 32 | param['nthread'] = 4
|
30 | 33 | param['num_class'] = 6
|
31 | 34 |
|
32 |
| -watchlist = [ (xg_train,'train'), (xg_test, 'test') ] |
| 35 | +watchlist = [(xg_train, 'train'), (xg_test, 'test')] |
33 | 36 | num_round = 5
|
34 |
| -bst = xgb.train(param, xg_train, num_round, watchlist ); |
| 37 | +bst = xgb.train(param, xg_train, num_round, watchlist) |
35 | 38 | # get prediction
|
36 |
| -pred = bst.predict( xg_test ); |
37 |
| - |
38 |
| -print ('predicting, classification error=%f' % (sum( int(pred[i]) != test_Y[i] for i in range(len(test_Y))) / float(len(test_Y)) )) |
| 39 | +pred = bst.predict(xg_test) |
| 40 | +error_rate = np.sum(pred != test_Y) / test_Y.shape[0] |
| 41 | +print('Test error using softmax = {}'.format(error_rate)) |
39 | 42 |
|
40 | 43 | # do the same thing again, but output probabilities
|
41 | 44 | param['objective'] = 'multi:softprob'
|
42 |
| -bst = xgb.train(param, xg_train, num_round, watchlist ); |
| 45 | +bst = xgb.train(param, xg_train, num_round, watchlist) |
43 | 46 | # Note: this convention has been changed since xgboost-unity
|
44 | 47 | # get prediction, this is in 1D array, need reshape to (ndata, nclass)
|
45 |
| -yprob = bst.predict( xg_test ).reshape( test_Y.shape[0], 6 ) |
46 |
| -ylabel = np.argmax(yprob, axis=1) |
47 |
| - |
48 |
| -print ('predicting, classification error=%f' % (sum( int(ylabel[i]) != test_Y[i] for i in range(len(test_Y))) / float(len(test_Y)) )) |
| 48 | +pred_prob = bst.predict(xg_test).reshape(test_Y.shape[0], 6) |
| 49 | +pred_label = np.argmax(pred_prob, axis=1) |
| 50 | +error_rate = np.sum(pred != test_Y) / test_Y.shape[0] |
| 51 | +print('Test error using softprob = {}'.format(error_rate)) |
0 commit comments