-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfigure1_2.py
63 lines (48 loc) · 1.8 KB
/
figure1_2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# This code is supporting material for the book
# Building Machine Learning Systems with Python
# by Willi Richert and Luis Pedro Coelho
# published by PACKT Publishing
#
# It is made available under the MIT License
import numpy as np
from sklearn.datasets import load_boston
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from matplotlib import pyplot as plt
boston = load_boston()
# Index number five in the number of rooms
fig,ax = plt.subplots()
ax.scatter(boston.data[:, 5], boston.target)
ax.set_xlabel("Average number of rooms (RM)")
ax.set_ylabel("House Price")
x = boston.data[:, 5]
# fit (used below) takes a two-dimensional array as input. We use np.atleast_2d
# to convert from one to two dimensional, then transpose to make sure that the
# format matches:
x = np.transpose(np.atleast_2d(x))
y = boston.target
lr = LinearRegression(fit_intercept=False)
lr.fit(x, y)
ax.plot([0, boston.data[:, 5].max() + 1],
[0, lr.predict(boston.data[:, 5].max() + 1)], '-', lw=4)
fig.savefig('Figure1.png')
mse = mean_squared_error(y, lr.predict(x))
rmse = np.sqrt(mse)
print('RMSE (no intercept): {}'.format(rmse))
# Repeat, but fitting an intercept this time:
lr = LinearRegression(fit_intercept=True)
lr.fit(x, y)
fig,ax = plt.subplots()
ax.set_xlabel("Average number of rooms (RM)")
ax.set_ylabel("House Price")
ax.scatter(boston.data[:, 5], boston.target)
xmin = x.min()
xmax = x.max()
ax.plot([xmin, xmax], lr.predict([[xmin], [xmax]]) , '-', lw=4)
fig.savefig('Figure2.png')
mse = mean_squared_error(y, lr.predict(x))
print("Mean squared error (of training data): {:.3}".format(mse))
rmse = np.sqrt(mse)
print("Root mean squared error (of training data): {:.3}".format(rmse))
cod = r2_score(y, lr.predict(x))
print('COD (on training data): {:.2}'.format(cod))