Skip to content

Commit

Permalink
ENH rename l-bfgs to lbfgs in MLP for the sake of consistency
Browse files Browse the repository at this point in the history
  • Loading branch information
ogrisel committed Sep 12, 2016
1 parent 5a018a3 commit f02fce8
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 28 deletions.
8 changes: 4 additions & 4 deletions doc/modules/neural_networks_supervised.rst
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,9 @@ training samples::
>>> from sklearn.neural_network import MLPClassifier
>>> X = [[0., 0.], [1., 1.]]
>>> y = [0, 1]
>>> clf = MLPClassifier(algorithm='l-bfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1)
>>> clf = MLPClassifier(algorithm='lbgfs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1)
>>> clf.fit(X, y) # doctest: +NORMALIZE_WHITESPACE
MLPClassifier(activation='relu', algorithm='l-bfgs', alpha=1e-05,
MLPClassifier(activation='relu', algorithm='lbgfs', alpha=1e-05,
batch_size='auto', beta_1=0.9, beta_2=0.999, early_stopping=False,
epsilon=1e-08, hidden_layer_sizes=(5, 2), learning_rate='constant',
learning_rate_init=0.001, max_iter=200, momentum=0.9,
Expand Down Expand Up @@ -132,10 +132,10 @@ indices where the value is `1` represents the assigned classes of that sample::

>>> X = [[0., 0.], [1., 1.]]
>>> y = [[0, 1], [1, 1]]
>>> clf = MLPClassifier(algorithm='l-bfgs', alpha=1e-5,
>>> clf = MLPClassifier(algorithm='lbgfs', alpha=1e-5,
... hidden_layer_sizes=(15,), random_state=1)
>>> clf.fit(X, y)
MLPClassifier(activation='relu', algorithm='l-bfgs', alpha=1e-05,
MLPClassifier(activation='relu', algorithm='lbgfs', alpha=1e-05,
batch_size='auto', beta_1=0.9, beta_2=0.999, early_stopping=False,
epsilon=1e-08, hidden_layer_sizes=(15,), learning_rate='constant',
learning_rate_init=0.001, max_iter=200, momentum=0.9,
Expand Down
30 changes: 15 additions & 15 deletions sklearn/neural_network/multilayer_perceptron.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def _loss_grad_lbfgs(self, packed_coef_inter, X, y, activations, deltas,
with respect to the different parameters given in the initialization.
Returned gradients are packed in a single vector so it can be used
in l-bfgs
in lbgfs
Parameters
----------
Expand Down Expand Up @@ -344,8 +344,8 @@ def _fit(self, X, y, incremental=False):
# First time training the model
self._initialize(y, layer_units)

# l-bfgs does not support mini-batches
if self.algorithm == 'l-bfgs':
# lbgfs does not support mini-batches
if self.algorithm == 'lbgfs':
batch_size = n_samples
elif self.batch_size == 'auto':
batch_size = min(200, n_samples)
Expand Down Expand Up @@ -374,7 +374,7 @@ def _fit(self, X, y, incremental=False):
intercept_grads, layer_units, incremental)

# Run the LBFGS algorithm
elif self.algorithm == 'l-bfgs':
elif self.algorithm == 'lbgfs':
self._fit_lbfgs(X, y, activations, deltas, coef_grads,
intercept_grads, layer_units)
return self
Expand Down Expand Up @@ -421,7 +421,7 @@ def _validate_hyperparameters(self):
if self.learning_rate not in ["constant", "invscaling", "adaptive"]:
raise ValueError("learning rate %s is not supported. " %
self.learning_rate)
supported_algorithms = _STOCHASTIC_ALGOS + ["l-bfgs"]
supported_algorithms = _STOCHASTIC_ALGOS + ["lbgfs"]
if self.algorithm not in supported_algorithms:
raise ValueError("The algorithm %s is not supported. "
" Expected one of: %s" %
Expand Down Expand Up @@ -679,7 +679,7 @@ def _predict(self, X):
class MLPClassifier(BaseMultilayerPerceptron, ClassifierMixin):
"""Multi-layer Perceptron classifier.
This algorithm optimizes the log-loss function using l-bfgs or gradient
This algorithm optimizes the log-loss function using lbgfs or gradient
descent.
Parameters
Expand All @@ -703,10 +703,10 @@ class MLPClassifier(BaseMultilayerPerceptron, ClassifierMixin):
- 'relu', the rectified linear unit function,
returns f(x) = max(0, x)
algorithm : {'l-bfgs', 'sgd', 'adam'}, default 'adam'
algorithm : {'lbgfs', 'sgd', 'adam'}, default 'adam'
The algorithm for weight optimization.
- 'l-bfgs' is an optimization algorithm in the family of
- 'lbgfs' is an optimization algorithm in the family of
quasi-Newton methods.
- 'sgd' refers to stochastic gradient descent.
Expand All @@ -717,15 +717,15 @@ class MLPClassifier(BaseMultilayerPerceptron, ClassifierMixin):
Note: The default algorithm 'adam' works pretty well on relatively
large datasets (with thousands of training samples or more) in terms of
both training time and validation score.
For small datasets, however, 'l-bfgs' can converge faster and perform
For small datasets, however, 'lbgfs' can converge faster and perform
better.
alpha : float, optional, default 0.0001
L2 penalty (regularization term) parameter.
batch_size : int, optional, default 'auto'
Size of minibatches for stochastic optimizers.
If the algorithm is 'l-bfgs', the classifier will not use minibatch.
If the algorithm is 'lbgfs', the classifier will not use minibatch.
When set to "auto", `batch_size=min(200, n_samples)`
learning_rate : {'constant', 'invscaling', 'adaptive'}, default 'constant'
Expand Down Expand Up @@ -1021,7 +1021,7 @@ def predict_proba(self, X):
class MLPRegressor(BaseMultilayerPerceptron, RegressorMixin):
"""Multi-layer Perceptron regressor.
This algorithm optimizes the squared-loss using l-bfgs or gradient descent.
This algorithm optimizes the squared-loss using lbgfs or gradient descent.
Parameters
----------
Expand All @@ -1044,10 +1044,10 @@ class MLPRegressor(BaseMultilayerPerceptron, RegressorMixin):
- 'relu', the rectified linear unit function,
returns f(x) = max(0, x)
algorithm : {'l-bfgs', 'sgd', 'adam'}, default 'adam'
algorithm : {'lbgfs', 'sgd', 'adam'}, default 'adam'
The algorithm for weight optimization.
- 'l-bfgs' is an optimization algorithm in the family of
- 'lbgfs' is an optimization algorithm in the family of
quasi-Newton methods.
- 'sgd' refers to stochastic gradient descent.
Expand All @@ -1058,15 +1058,15 @@ class MLPRegressor(BaseMultilayerPerceptron, RegressorMixin):
Note: The default algorithm 'adam' works pretty well on relatively
large datasets (with thousands of training samples or more) in terms of
both training time and validation score.
For small datasets, however, 'l-bfgs' can converge faster and perform
For small datasets, however, 'lbgfs' can converge faster and perform
better.
alpha : float, optional, default 0.0001
L2 penalty (regularization term) parameter.
batch_size : int, optional, default 'auto'
Size of minibatches for stochastic optimizers.
If the algorithm is 'l-bfgs', the classifier will not use minibatch.
If the algorithm is 'lbgfs', the classifier will not use minibatch.
When set to "auto", `batch_size=min(200, n_samples)`
learning_rate : {'constant', 'invscaling', 'adaptive'}, default 'constant'
Expand Down
18 changes: 9 additions & 9 deletions sklearn/neural_network/tests/test_mlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ def test_gradient():

for activation in ACTIVATION_TYPES:
mlp = MLPClassifier(activation=activation, hidden_layer_sizes=10,
algorithm='l-bfgs', alpha=1e-5,
algorithm='lbgfs', alpha=1e-5,
learning_rate_init=0.2, max_iter=1,
random_state=1)
mlp.fit(X, y)
Expand Down Expand Up @@ -238,7 +238,7 @@ def test_lbfgs_classification():
expected_shape_dtype = (X_test.shape[0], y_train.dtype.kind)

for activation in ACTIVATION_TYPES:
mlp = MLPClassifier(algorithm='l-bfgs', hidden_layer_sizes=50,
mlp = MLPClassifier(algorithm='lbgfs', hidden_layer_sizes=50,
max_iter=150, shuffle=True, random_state=1,
activation=activation)
mlp.fit(X_train, y_train)
Expand All @@ -253,7 +253,7 @@ def test_lbfgs_regression():
X = Xboston
y = yboston
for activation in ACTIVATION_TYPES:
mlp = MLPRegressor(algorithm='l-bfgs', hidden_layer_sizes=50,
mlp = MLPRegressor(algorithm='lbgfs', hidden_layer_sizes=50,
max_iter=150, shuffle=True, random_state=1,
activation=activation)
mlp.fit(X, y)
Expand Down Expand Up @@ -290,7 +290,7 @@ def test_multilabel_classification():
# test fit method
X, y = make_multilabel_classification(n_samples=50, random_state=0,
return_indicator=True)
mlp = MLPClassifier(algorithm='l-bfgs', hidden_layer_sizes=50, alpha=1e-5,
mlp = MLPClassifier(algorithm='lbgfs', hidden_layer_sizes=50, alpha=1e-5,
max_iter=150, random_state=0, activation='logistic',
learning_rate_init=0.2)
mlp.fit(X, y)
Expand All @@ -308,7 +308,7 @@ def test_multilabel_classification():
def test_multioutput_regression():
# Test that multi-output regression works as expected
X, y = make_regression(n_samples=200, n_targets=5)
mlp = MLPRegressor(algorithm='l-bfgs', hidden_layer_sizes=50, max_iter=200,
mlp = MLPRegressor(algorithm='lbgfs', hidden_layer_sizes=50, max_iter=200,
random_state=1)
mlp.fit(X, y)
assert_greater(mlp.score(X, y), 0.9)
Expand Down Expand Up @@ -383,8 +383,8 @@ def test_partial_fit_errors():
X, y,
classes=[2])

# l-bfgs doesn't support partial_fit
assert_false(hasattr(MLPClassifier(algorithm='l-bfgs'), 'partial_fit'))
# lbgfs doesn't support partial_fit
assert_false(hasattr(MLPClassifier(algorithm='lbgfs'), 'partial_fit'))


def test_params_errors():
Expand Down Expand Up @@ -466,7 +466,7 @@ def test_predict_proba_multilabel():
return_indicator=True)
n_samples, n_classes = Y.shape

clf = MLPClassifier(algorithm='l-bfgs', hidden_layer_sizes=30,
clf = MLPClassifier(algorithm='lbgfs', hidden_layer_sizes=30,
random_state=0)
clf.fit(X, Y)
y_proba = clf.predict_proba(X)
Expand All @@ -488,7 +488,7 @@ def test_sparse_matrices():
X = X_digits_binary[:50]
y = y_digits_binary[:50]
X_sparse = csr_matrix(X)
mlp = MLPClassifier(algorithm='l-bfgs', hidden_layer_sizes=15,
mlp = MLPClassifier(algorithm='lbgfs', hidden_layer_sizes=15,
random_state=1)
mlp.fit(X, y)
pred1 = mlp.predict(X)
Expand Down

0 comments on commit f02fce8

Please sign in to comment.