forked from dmlc/xgboost
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request dmlc#528 from terrytangyuan/test
More Unit Tests for Python Package
- Loading branch information
Showing
5 changed files
with
102 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,7 @@ | |
|
||
|
||
dpath = 'demo/data/' | ||
rng = np.random.RandomState(1994) | ||
|
||
class TestBasic(unittest.TestCase): | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
import xgboost as xgb | ||
import numpy as np | ||
from sklearn.datasets import load_digits | ||
from sklearn.cross_validation import KFold, train_test_split | ||
|
||
rng = np.random.RandomState(1994) | ||
|
||
def test_early_stopping_nonparallel(): | ||
# digits = load_digits(2) | ||
# X = digits['data'] | ||
# y = digits['target'] | ||
# X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) | ||
# clf = xgb.XGBClassifier() | ||
# clf.fit(X_train, y_train, early_stopping_rounds=10, eval_metric="auc", | ||
# eval_set=[(X_test, y_test)]) | ||
print("This test will be re-visited later. ") | ||
|
||
# TODO: parallel test for early stopping | ||
# TODO: comment out for now. Will re-visit later |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
import xgboost as xgb | ||
import numpy as np | ||
from sklearn.cross_validation import KFold, train_test_split | ||
from sklearn.metrics import mean_squared_error | ||
from sklearn.grid_search import GridSearchCV | ||
from sklearn.datasets import load_iris, load_digits, load_boston | ||
|
||
rng = np.random.RandomState(1994) | ||
|
||
def test_binary_classification(): | ||
digits = load_digits(2) | ||
y = digits['target'] | ||
X = digits['data'] | ||
kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng) | ||
for train_index, test_index in kf: | ||
xgb_model = xgb.XGBClassifier().fit(X[train_index],y[train_index]) | ||
preds = xgb_model.predict(X[test_index]) | ||
labels = y[test_index] | ||
err = sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) / float(len(preds)) | ||
assert err < 0.1 | ||
|
||
def test_multiclass_classification(): | ||
iris = load_iris() | ||
y = iris['target'] | ||
X = iris['data'] | ||
kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng) | ||
for train_index, test_index in kf: | ||
xgb_model = xgb.XGBClassifier().fit(X[train_index],y[train_index]) | ||
preds = xgb_model.predict(X[test_index]) | ||
labels = y[test_index] | ||
err = sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) / float(len(preds)) | ||
assert err < 0.4 | ||
|
||
def test_boston_housing_regression(): | ||
boston = load_boston() | ||
y = boston['target'] | ||
X = boston['data'] | ||
kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng) | ||
for train_index, test_index in kf: | ||
xgb_model = xgb.XGBRegressor().fit(X[train_index],y[train_index]) | ||
preds = xgb_model.predict(X[test_index]) | ||
labels = y[test_index] | ||
assert mean_squared_error(preds, labels) < 15 | ||
|
||
def test_parameter_tuning(): | ||
boston = load_boston() | ||
y = boston['target'] | ||
X = boston['data'] | ||
xgb_model = xgb.XGBRegressor() | ||
clf = GridSearchCV(xgb_model, | ||
{'max_depth': [2,4,6], | ||
'n_estimators': [50,100,200]}, verbose=1) | ||
clf.fit(X,y) | ||
assert clf.best_score_ < 0.7 | ||
assert clf.best_params_ == {'n_estimators': 100, 'max_depth': 4} | ||
|
||
|