Skip to content

Commit

Permalink
[SPARK-13008][ML][PYTHON] Put one alg per line in pyspark.ml all lists
Browse files Browse the repository at this point in the history
This is to fix a long-time annoyance: Whenever we add a new algorithm to pyspark.ml, we have to add it to the ```__all__``` list at the top.  Since we keep it alphabetized, it often creates a lot more changes than needed.  It is also easy to add the Estimator and forget the Model.  I'm going to switch it to have one algorithm per line.

This also alphabetizes a few out-of-place classes in pyspark.ml.feature.  No changes have been made to the moved classes.

CC: thunterdb

Author: Joseph K. Bradley <[email protected]>

Closes apache#10927 from jkbradley/ml-python-all-list.
  • Loading branch information
jkbradley authored and mengxr committed Mar 2, 2016
1 parent e42724b commit 9495c40
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 15 deletions.
11 changes: 6 additions & 5 deletions python/pyspark/ml/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,12 @@
from pyspark.mllib.common import inherit_doc


__all__ = ['LogisticRegression', 'LogisticRegressionModel', 'DecisionTreeClassifier',
'DecisionTreeClassificationModel', 'GBTClassifier', 'GBTClassificationModel',
'RandomForestClassifier', 'RandomForestClassificationModel', 'NaiveBayes',
'NaiveBayesModel', 'MultilayerPerceptronClassifier',
'MultilayerPerceptronClassificationModel']
__all__ = ['LogisticRegression', 'LogisticRegressionModel',
'DecisionTreeClassifier', 'DecisionTreeClassificationModel',
'GBTClassifier', 'GBTClassificationModel',
'RandomForestClassifier', 'RandomForestClassificationModel',
'NaiveBayes', 'NaiveBayesModel',
'MultilayerPerceptronClassifier', 'MultilayerPerceptronClassificationModel']


@inherit_doc
Expand Down
3 changes: 2 additions & 1 deletion python/pyspark/ml/clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@
from pyspark.ml.param.shared import *
from pyspark.mllib.common import inherit_doc

__all__ = ['KMeans', 'KMeansModel', 'BisectingKMeans', 'BisectingKMeansModel']
__all__ = ['BisectingKMeans', 'BisectingKMeansModel',
'KMeans', 'KMeansModel']


class KMeansModel(JavaModel, MLWritable, MLReadable):
Expand Down
37 changes: 28 additions & 9 deletions python/pyspark/ml/feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,34 @@
from pyspark.mllib.common import inherit_doc
from pyspark.mllib.linalg import _convert_to_vector

__all__ = ['Binarizer', 'Bucketizer', 'CountVectorizer', 'CountVectorizerModel', 'DCT',
'ElementwiseProduct', 'HashingTF', 'IDF', 'IDFModel', 'IndexToString',
'MaxAbsScaler', 'MaxAbsScalerModel', 'MinMaxScaler', 'MinMaxScalerModel',
'NGram', 'Normalizer', 'OneHotEncoder', 'PCA', 'PCAModel', 'PolynomialExpansion',
'QuantileDiscretizer', 'RegexTokenizer', 'RFormula', 'RFormulaModel',
'SQLTransformer', 'StandardScaler', 'StandardScalerModel', 'StopWordsRemover',
'StringIndexer', 'StringIndexerModel', 'Tokenizer', 'VectorAssembler',
'VectorIndexer', 'VectorSlicer', 'Word2Vec', 'Word2VecModel', 'ChiSqSelector',
'ChiSqSelectorModel']
__all__ = ['Binarizer',
'Bucketizer',
'ChiSqSelector', 'ChiSqSelectorModel',
'CountVectorizer', 'CountVectorizerModel',
'DCT',
'ElementwiseProduct',
'HashingTF',
'IDF', 'IDFModel',
'IndexToString',
'MaxAbsScaler', 'MaxAbsScalerModel',
'MinMaxScaler', 'MinMaxScalerModel',
'NGram',
'Normalizer',
'OneHotEncoder',
'PCA', 'PCAModel',
'PolynomialExpansion',
'QuantileDiscretizer',
'RegexTokenizer',
'RFormula', 'RFormulaModel',
'SQLTransformer',
'StandardScaler', 'StandardScalerModel',
'StopWordsRemover',
'StringIndexer', 'StringIndexerModel',
'Tokenizer',
'VectorAssembler',
'VectorIndexer', 'VectorIndexerModel',
'VectorSlicer',
'Word2Vec', 'Word2VecModel']


@inherit_doc
Expand Down

0 comments on commit 9495c40

Please sign in to comment.