Skip to content

Commit

Permalink
removed cross_val_score as it not required
Browse files Browse the repository at this point in the history
  • Loading branch information
RJ Agrawal committed Jul 29, 2020
1 parent 22dc685 commit bb4113c
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 61 deletions.
4 changes: 2 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,11 @@ Import
Import what you need from the ``sklearn_pandas`` package. The choices are:

* ``DataFrameMapper``, a class for mapping pandas data frame columns to different sklearn transformations
* ``cross_val_score``, similar to ``sklearn.cross_validation.cross_val_score`` but working on pandas DataFrames


For this demonstration, we will import both::

>>> from sklearn_pandas import DataFrameMapper, cross_val_score
>>> from sklearn_pandas import DataFrameMapper

For these examples, we'll also use pandas, numpy, and sklearn::

Expand Down
1 change: 0 additions & 1 deletion sklearn_pandas/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
__version__ = '2.0.0'

from .dataframe_mapper import DataFrameMapper # NOQA
from .cross_validation import cross_val_score, GridSearchCV, RandomizedSearchCV # NOQA
from .features_generator import gen_features # NOQA
from .transformers import NumericalTransformer # NOQA
56 changes: 0 additions & 56 deletions sklearn_pandas/cross_validation.py
Original file line number Diff line number Diff line change
@@ -1,59 +1,3 @@
import warnings
try:
from sklearn.model_selection import cross_val_score as sk_cross_val_score
from sklearn.model_selection import GridSearchCV as SKGridSearchCV
from sklearn.model_selection import RandomizedSearchCV as \
SKRandomizedSearchCV
except ImportError:
from sklearn.cross_validation import cross_val_score as sk_cross_val_score
from sklearn.grid_search import GridSearchCV as SKGridSearchCV
from sklearn.grid_search import RandomizedSearchCV as SKRandomizedSearchCV

DEPRECATION_MSG = '''
Custom cross-validation compatibility shims are no longer needed for
scikit-learn>=0.16.0 and will be dropped in sklearn-pandas==2.0.
'''


def cross_val_score(model, X, *args, **kwargs):
warnings.warn(DEPRECATION_MSG, DeprecationWarning)
X = DataWrapper(X)
return sk_cross_val_score(model, X, *args, **kwargs)


class GridSearchCV(SKGridSearchCV):

def __init__(self, *args, **kwargs):
warnings.warn(DEPRECATION_MSG, DeprecationWarning)
super(GridSearchCV, self).__init__(*args, **kwargs)

def fit(self, X, *params, **kwparams):
return super(GridSearchCV, self).fit(
DataWrapper(X), *params, **kwparams)

def predict(self, X, *params, **kwparams):
return super(GridSearchCV, self).predict(
DataWrapper(X), *params, **kwparams)


try:
class RandomizedSearchCV(SKRandomizedSearchCV):

def __init__(self, *args, **kwargs):
warnings.warn(DEPRECATION_MSG, DeprecationWarning)
super(RandomizedSearchCV, self).__init__(*args, **kwargs)

def fit(self, X, *params, **kwparams):
return super(RandomizedSearchCV, self).fit(
DataWrapper(X), *params, **kwparams)

def predict(self, X, *params, **kwparams):
return super(RandomizedSearchCV, self).predict(
DataWrapper(X), *params, **kwparams)
except AttributeError:
pass


class DataWrapper(object):

def __init__(self, df):
Expand Down
24 changes: 23 additions & 1 deletion tests/test_dataframe_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from scipy import sparse
from sklearn.datasets import load_iris
from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction import DictVectorizer
Expand All @@ -27,7 +28,7 @@
from numpy.testing import assert_array_equal
import pickle

from sklearn_pandas import DataFrameMapper, cross_val_score
from sklearn_pandas import DataFrameMapper
from sklearn_pandas.dataframe_mapper import _handle_feature, _build_transformer
from sklearn_pandas.pipeline import TransformerPipeline

Expand Down Expand Up @@ -882,6 +883,27 @@ def test_with_car_dataframe(cars_dataframe):
assert scores.mean() > 0.30


def test_direct_cross_validation(iris_dataframe):
"""
Starting with sklearn>=0.16.0 we no longer need CV wrappers for dataframes.
See https://github.com/paulgb/sklearn-pandas/issues/11
"""
pipeline = Pipeline([
("preprocess", DataFrameMapper([
("petal length (cm)", None),
("petal width (cm)", None),
("sepal length (cm)", None),
("sepal width (cm)", None),
])),
("classify", SVC(kernel='linear'))
])
data = iris_dataframe.drop("species", axis=1)
labels = iris_dataframe["species"]
scores = cross_val_score(pipeline, data, labels)
assert scores.mean() > 0.96
assert (scores.std() * 2) < 0.04


def test_heterogeneous_output_types_input_df():
"""
Modify feat2, but pass feat1 through unmodified.
Expand Down
2 changes: 1 addition & 1 deletion tests/test_transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import joblib

from sklearn_pandas import DataFrameMapper
from sklearn_pandas.transformers import NumericalTransformer
from sklearn_pandas import NumericalTransformer


@pytest.fixture
Expand Down

0 comments on commit bb4113c

Please sign in to comment.