forked from scikit-learn/scikit-learn
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathplot_polynomial_regression.py
57 lines (48 loc) · 2.01 KB
/
plot_polynomial_regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
"""
=====================
Polynomial Regression
=====================
This example demonstrates how we can use linear regression with polynomial
features to approximate nonlinear functions. The plot shows the function that
we want to approximate, which is a part of the cosine function. In addition,
the samples from the real function and the approximations of different models
are displayed. The models have polynomial features of different degrees. We
can see that a linear function (polynomial with degree 1) is not sufficient
to fit the training samples. This is called **underfitting**. A polynomial of
degree 4 approximates the true function almost perfectly. However, for higher
degrees the model will **overfit** the training data, i.e. it learns the
noise of the training data.
"""
print(__doc__)
import numpy as np
import matplotlib.pyplot as plt
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
np.random.seed(0)
n_samples = 30
degrees = [1, 4, 15]
true_fun = lambda X: np.cos(1.5 * np.pi * X)
X = np.sort(np.random.rand(n_samples))
y = true_fun(X) + np.random.randn(n_samples) * 0.1
plt.figure(figsize=(14, 4))
for i in range(len(degrees)):
ax = plt.subplot(1, len(degrees), i+1)
plt.setp(ax, xticks=(), yticks=())
polynomial_features = PolynomialFeatures(degree=degrees[i],
include_bias=False)
linear_regression = LinearRegression()
pipeline = Pipeline([("polynomial_features", polynomial_features),
("linear_regression", linear_regression)])
pipeline.fit(X[:, np.newaxis], y)
X_test = np.linspace(0, 1, 100)
plt.plot(X_test, pipeline.predict(X_test[:, np.newaxis]), label="Model")
plt.plot(X_test, true_fun(X_test), label="True function")
plt.scatter(X, y, label="Samples")
plt.xlabel("x")
plt.ylabel("y")
plt.xlim((0, 1))
plt.ylim((-2, 2))
plt.legend(loc="best")
plt.title("Degree %d" % degrees[i])
plt.show()