ENH rename l-bfgs to lbfgs in MLP for the sake of consistency

jomicobasi · Sep 12, 2016 · f02fce8 · f02fce8
1 parent 5a018a3
commit f02fce8
Show file tree

Hide file tree

Showing 3 changed files with 28 additions and 28 deletions.
diff --git a/doc/modules/neural_networks_supervised.rst b/doc/modules/neural_networks_supervised.rst
@@ -86,9 +86,9 @@ training samples::
     >>> from sklearn.neural_network import MLPClassifier
     >>> X = [[0., 0.], [1., 1.]]
     >>> y = [0, 1]
-    >>> clf = MLPClassifier(algorithm='l-bfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1)
+    >>> clf = MLPClassifier(algorithm='lbgfs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1)
     >>> clf.fit(X, y) # doctest: +NORMALIZE_WHITESPACE
-    MLPClassifier(activation='relu', algorithm='l-bfgs', alpha=1e-05,
+    MLPClassifier(activation='relu', algorithm='lbgfs', alpha=1e-05,
            batch_size='auto', beta_1=0.9, beta_2=0.999, early_stopping=False,
            epsilon=1e-08, hidden_layer_sizes=(5, 2), learning_rate='constant',
            learning_rate_init=0.001, max_iter=200, momentum=0.9,
@@ -132,10 +132,10 @@ indices where the value is `1` represents the assigned classes of that sample::
 
     >>> X = [[0., 0.], [1., 1.]]
     >>> y = [[0, 1], [1, 1]]
-    >>> clf = MLPClassifier(algorithm='l-bfgs', alpha=1e-5,
+    >>> clf = MLPClassifier(algorithm='lbgfs', alpha=1e-5,
     ...                     hidden_layer_sizes=(15,), random_state=1)
     >>> clf.fit(X, y)
-    MLPClassifier(activation='relu', algorithm='l-bfgs', alpha=1e-05,
+    MLPClassifier(activation='relu', algorithm='lbgfs', alpha=1e-05,
            batch_size='auto', beta_1=0.9, beta_2=0.999, early_stopping=False,
            epsilon=1e-08, hidden_layer_sizes=(15,), learning_rate='constant',
            learning_rate_init=0.001, max_iter=200, momentum=0.9,

diff --git a/sklearn/neural_network/multilayer_perceptron.py b/sklearn/neural_network/multilayer_perceptron.py
@@ -133,7 +133,7 @@ def _loss_grad_lbfgs(self, packed_coef_inter, X, y, activations, deltas,
         with respect to the different parameters given in the initialization.
 
         Returned gradients are packed in a single vector so it can be used
-        in l-bfgs
+        in lbgfs
 
         Parameters
         ----------
@@ -344,8 +344,8 @@ def _fit(self, X, y, incremental=False):
             # First time training the model
             self._initialize(y, layer_units)
 
-        # l-bfgs does not support mini-batches
-        if self.algorithm == 'l-bfgs':
+        # lbgfs does not support mini-batches
+        if self.algorithm == 'lbgfs':
             batch_size = n_samples
         elif self.batch_size == 'auto':
             batch_size = min(200, n_samples)
@@ -374,7 +374,7 @@ def _fit(self, X, y, incremental=False):
                                  intercept_grads, layer_units, incremental)
 
         # Run the LBFGS algorithm
-        elif self.algorithm == 'l-bfgs':
+        elif self.algorithm == 'lbgfs':
             self._fit_lbfgs(X, y, activations, deltas, coef_grads,
                             intercept_grads, layer_units)
         return self
@@ -421,7 +421,7 @@ def _validate_hyperparameters(self):
         if self.learning_rate not in ["constant", "invscaling", "adaptive"]:
             raise ValueError("learning rate %s is not supported. " %
                              self.learning_rate)
-        supported_algorithms = _STOCHASTIC_ALGOS + ["l-bfgs"]
+        supported_algorithms = _STOCHASTIC_ALGOS + ["lbgfs"]
         if self.algorithm not in supported_algorithms:
             raise ValueError("The algorithm %s is not supported. "
                              " Expected one of: %s" %
@@ -679,7 +679,7 @@ def _predict(self, X):
 class MLPClassifier(BaseMultilayerPerceptron, ClassifierMixin):
     """Multi-layer Perceptron classifier.
 
-    This algorithm optimizes the log-loss function using l-bfgs or gradient
+    This algorithm optimizes the log-loss function using lbgfs or gradient
     descent.
 
     Parameters
@@ -703,10 +703,10 @@ class MLPClassifier(BaseMultilayerPerceptron, ClassifierMixin):
         - 'relu', the rectified linear unit function,
           returns f(x) = max(0, x)
 
-    algorithm : {'l-bfgs', 'sgd', 'adam'}, default 'adam'
+    algorithm : {'lbgfs', 'sgd', 'adam'}, default 'adam'
         The algorithm for weight optimization.
 
-        - 'l-bfgs' is an optimization algorithm in the family of
+        - 'lbgfs' is an optimization algorithm in the family of
           quasi-Newton methods.
 
         - 'sgd' refers to stochastic gradient descent.
@@ -717,15 +717,15 @@ class MLPClassifier(BaseMultilayerPerceptron, ClassifierMixin):
         Note: The default algorithm 'adam' works pretty well on relatively
         large datasets (with thousands of training samples or more) in terms of
         both training time and validation score.
-        For small datasets, however, 'l-bfgs' can converge faster and perform
+        For small datasets, however, 'lbgfs' can converge faster and perform
         better.
 
     alpha : float, optional, default 0.0001
         L2 penalty (regularization term) parameter.
 
     batch_size : int, optional, default 'auto'
         Size of minibatches for stochastic optimizers.
-        If the algorithm is 'l-bfgs', the classifier will not use minibatch.
+        If the algorithm is 'lbgfs', the classifier will not use minibatch.
         When set to "auto", `batch_size=min(200, n_samples)`
 
     learning_rate : {'constant', 'invscaling', 'adaptive'}, default 'constant'
@@ -1021,7 +1021,7 @@ def predict_proba(self, X):
 class MLPRegressor(BaseMultilayerPerceptron, RegressorMixin):
     """Multi-layer Perceptron regressor.
 
-    This algorithm optimizes the squared-loss using l-bfgs or gradient descent.
+    This algorithm optimizes the squared-loss using lbgfs or gradient descent.
 
     Parameters
     ----------
@@ -1044,10 +1044,10 @@ class MLPRegressor(BaseMultilayerPerceptron, RegressorMixin):
         - 'relu', the rectified linear unit function,
           returns f(x) = max(0, x)
 
-    algorithm : {'l-bfgs', 'sgd', 'adam'}, default 'adam'
+    algorithm : {'lbgfs', 'sgd', 'adam'}, default 'adam'
         The algorithm for weight optimization.
 
-        - 'l-bfgs' is an optimization algorithm in the family of
+        - 'lbgfs' is an optimization algorithm in the family of
           quasi-Newton methods.
 
         - 'sgd' refers to stochastic gradient descent.
@@ -1058,15 +1058,15 @@ class MLPRegressor(BaseMultilayerPerceptron, RegressorMixin):
         Note: The default algorithm 'adam' works pretty well on relatively
         large datasets (with thousands of training samples or more) in terms of
         both training time and validation score.
-        For small datasets, however, 'l-bfgs' can converge faster and perform
+        For small datasets, however, 'lbgfs' can converge faster and perform
         better.
 
     alpha : float, optional, default 0.0001
         L2 penalty (regularization term) parameter.
 
     batch_size : int, optional, default 'auto'
         Size of minibatches for stochastic optimizers.
-        If the algorithm is 'l-bfgs', the classifier will not use minibatch.
+        If the algorithm is 'lbgfs', the classifier will not use minibatch.
         When set to "auto", `batch_size=min(200, n_samples)`
 
     learning_rate : {'constant', 'invscaling', 'adaptive'}, default 'constant'

diff --git a/sklearn/neural_network/tests/test_mlp.py b/sklearn/neural_network/tests/test_mlp.py
@@ -179,7 +179,7 @@ def test_gradient():
 
         for activation in ACTIVATION_TYPES:
             mlp = MLPClassifier(activation=activation, hidden_layer_sizes=10,
-                                algorithm='l-bfgs', alpha=1e-5,
+                                algorithm='lbgfs', alpha=1e-5,
                                 learning_rate_init=0.2, max_iter=1,
                                 random_state=1)
             mlp.fit(X, y)
@@ -238,7 +238,7 @@ def test_lbfgs_classification():
         expected_shape_dtype = (X_test.shape[0], y_train.dtype.kind)
 
         for activation in ACTIVATION_TYPES:
-            mlp = MLPClassifier(algorithm='l-bfgs', hidden_layer_sizes=50,
+            mlp = MLPClassifier(algorithm='lbgfs', hidden_layer_sizes=50,
                                 max_iter=150, shuffle=True, random_state=1,
                                 activation=activation)
             mlp.fit(X_train, y_train)
@@ -253,7 +253,7 @@ def test_lbfgs_regression():
     X = Xboston
     y = yboston
     for activation in ACTIVATION_TYPES:
-        mlp = MLPRegressor(algorithm='l-bfgs', hidden_layer_sizes=50,
+        mlp = MLPRegressor(algorithm='lbgfs', hidden_layer_sizes=50,
                            max_iter=150, shuffle=True, random_state=1,
                            activation=activation)
         mlp.fit(X, y)
@@ -290,7 +290,7 @@ def test_multilabel_classification():
     # test fit method
     X, y = make_multilabel_classification(n_samples=50, random_state=0,
                                           return_indicator=True)
-    mlp = MLPClassifier(algorithm='l-bfgs', hidden_layer_sizes=50, alpha=1e-5,
+    mlp = MLPClassifier(algorithm='lbgfs', hidden_layer_sizes=50, alpha=1e-5,
                         max_iter=150, random_state=0, activation='logistic',
                         learning_rate_init=0.2)
     mlp.fit(X, y)
@@ -308,7 +308,7 @@ def test_multilabel_classification():
 def test_multioutput_regression():
     # Test that multi-output regression works as expected
     X, y = make_regression(n_samples=200, n_targets=5)
-    mlp = MLPRegressor(algorithm='l-bfgs', hidden_layer_sizes=50, max_iter=200,
+    mlp = MLPRegressor(algorithm='lbgfs', hidden_layer_sizes=50, max_iter=200,
                        random_state=1)
     mlp.fit(X, y)
     assert_greater(mlp.score(X, y), 0.9)
@@ -383,8 +383,8 @@ def test_partial_fit_errors():
                   X, y,
                   classes=[2])
 
-    # l-bfgs doesn't support partial_fit
-    assert_false(hasattr(MLPClassifier(algorithm='l-bfgs'), 'partial_fit'))
+    # lbgfs doesn't support partial_fit
+    assert_false(hasattr(MLPClassifier(algorithm='lbgfs'), 'partial_fit'))
 
 
 def test_params_errors():
@@ -466,7 +466,7 @@ def test_predict_proba_multilabel():
                                           return_indicator=True)
     n_samples, n_classes = Y.shape
 
-    clf = MLPClassifier(algorithm='l-bfgs', hidden_layer_sizes=30,
+    clf = MLPClassifier(algorithm='lbgfs', hidden_layer_sizes=30,
                         random_state=0)
     clf.fit(X, Y)
     y_proba = clf.predict_proba(X)
@@ -488,7 +488,7 @@ def test_sparse_matrices():
     X = X_digits_binary[:50]
     y = y_digits_binary[:50]
     X_sparse = csr_matrix(X)
-    mlp = MLPClassifier(algorithm='l-bfgs', hidden_layer_sizes=15,
+    mlp = MLPClassifier(algorithm='lbgfs', hidden_layer_sizes=15,
                         random_state=1)
     mlp.fit(X, y)
     pred1 = mlp.predict(X)