forked from scikit-learn/scikit-learn
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplot_mlp_training_curves.py
141 lines (122 loc) · 3.97 KB
/
plot_mlp_training_curves.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
"""
========================================================
Compare Stochastic learning strategies for MLPClassifier
========================================================
This example visualizes some training loss curves for different stochastic
learning strategies, including SGD and Adam. Because of time-constraints, we
use several small datasets, for which L-BFGS might be more suitable. The
general trend shown in these examples seems to carry over to larger datasets,
however.
Note that those results can be highly dependent on the value of
``learning_rate_init``.
"""
import warnings
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn import datasets
from sklearn.exceptions import ConvergenceWarning
# different learning rate schedules and momentum parameters
params = [
{
"solver": "sgd",
"learning_rate": "constant",
"momentum": 0,
"learning_rate_init": 0.2,
},
{
"solver": "sgd",
"learning_rate": "constant",
"momentum": 0.9,
"nesterovs_momentum": False,
"learning_rate_init": 0.2,
},
{
"solver": "sgd",
"learning_rate": "constant",
"momentum": 0.9,
"nesterovs_momentum": True,
"learning_rate_init": 0.2,
},
{
"solver": "sgd",
"learning_rate": "invscaling",
"momentum": 0,
"learning_rate_init": 0.2,
},
{
"solver": "sgd",
"learning_rate": "invscaling",
"momentum": 0.9,
"nesterovs_momentum": True,
"learning_rate_init": 0.2,
},
{
"solver": "sgd",
"learning_rate": "invscaling",
"momentum": 0.9,
"nesterovs_momentum": False,
"learning_rate_init": 0.2,
},
{"solver": "adam", "learning_rate_init": 0.01},
]
labels = [
"constant learning-rate",
"constant with momentum",
"constant with Nesterov's momentum",
"inv-scaling learning-rate",
"inv-scaling with momentum",
"inv-scaling with Nesterov's momentum",
"adam",
]
plot_args = [
{"c": "red", "linestyle": "-"},
{"c": "green", "linestyle": "-"},
{"c": "blue", "linestyle": "-"},
{"c": "red", "linestyle": "--"},
{"c": "green", "linestyle": "--"},
{"c": "blue", "linestyle": "--"},
{"c": "black", "linestyle": "-"},
]
def plot_on_dataset(X, y, ax, name):
# for each dataset, plot learning for each learning strategy
print("\nlearning on dataset %s" % name)
ax.set_title(name)
X = MinMaxScaler().fit_transform(X)
mlps = []
if name == "digits":
# digits is larger but converges fairly quickly
max_iter = 15
else:
max_iter = 400
for label, param in zip(labels, params):
print("training: %s" % label)
mlp = MLPClassifier(random_state=0, max_iter=max_iter, **param)
# some parameter combinations will not converge as can be seen on the
# plots so they are ignored here
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore", category=ConvergenceWarning, module="sklearn"
)
mlp.fit(X, y)
mlps.append(mlp)
print("Training set score: %f" % mlp.score(X, y))
print("Training set loss: %f" % mlp.loss_)
for mlp, label, args in zip(mlps, labels, plot_args):
ax.plot(mlp.loss_curve_, label=label, **args)
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
# load / generate some toy datasets
iris = datasets.load_iris()
X_digits, y_digits = datasets.load_digits(return_X_y=True)
data_sets = [
(iris.data, iris.target),
(X_digits, y_digits),
datasets.make_circles(noise=0.2, factor=0.5, random_state=1),
datasets.make_moons(noise=0.3, random_state=0),
]
for ax, data, name in zip(
axes.ravel(), data_sets, ["iris", "digits", "circles", "moons"]
):
plot_on_dataset(*data, ax=ax, name=name)
fig.legend(ax.get_lines(), labels, ncol=3, loc="upper center")
plt.show()