-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlr10k.py
38 lines (29 loc) · 1.2 KB
/
lr10k.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# This code is supporting material for the book
# Building Machine Learning Systems with Python
# by Willi Richert and Luis Pedro Coelho
# published by PACKT Publishing
#
# It is made available under the MIT License
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.datasets import load_svmlight_file
from sklearn.linear_model import LinearRegression
from sklearn.cross_validation import KFold
# Whether to use Elastic nets (otherwise, ordinary linear regression is used)
# Load data:
data, target = load_svmlight_file('data/E2006.train')
lr = LinearRegression()
# Compute error on training data to demonstrate that we can obtain near perfect
# scores:
lr.fit(data, target)
pred = lr.predict(data)
print('RMSE on training, {:.2}'.format(np.sqrt(mean_squared_error(target, pred))))
print('R2 on training, {:.2}'.format(r2_score(target, pred)))
print('')
pred = np.zeros_like(target)
kf = KFold(len(target), n_folds=5)
for train, test in kf:
lr.fit(data[train], target[train])
pred[test] = lr.predict(data[test])
print('RMSE on testing (5 fold), {:.2}'.format(np.sqrt(mean_squared_error(target, pred))))
print('R2 on testing (5 fold), {:.2}'.format(r2_score(target, pred)))