Skip to content

Commit

Permalink
MAINT Remove imports from sklearn.utils._joblib (scikit-learn#13676)
Browse files Browse the repository at this point in the history
* Remove sklearn.utils._joblib imports

* Lint

* More fixes
  • Loading branch information
rth authored and adrinjalali committed Jun 18, 2019
1 parent e2b6bff commit 1015caf
Show file tree
Hide file tree
Showing 51 changed files with 121 additions and 133 deletions.
4 changes: 1 addition & 3 deletions sklearn/cluster/k_means_.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import numpy as np
import scipy.sparse as sp
from joblib import Parallel, delayed, effective_n_jobs

from ..base import BaseEstimator, ClusterMixin, TransformerMixin
from ..metrics.pairwise import euclidean_distances
Expand All @@ -28,9 +29,6 @@
from ..utils import check_random_state
from ..utils.validation import check_is_fitted
from ..utils.validation import FLOAT_DTYPES
from ..utils._joblib import Parallel
from ..utils._joblib import delayed
from ..utils._joblib import effective_n_jobs
from ..exceptions import ConvergenceWarning
from . import _k_means
from ._k_means_elkan import k_means_elkan
Expand Down
3 changes: 1 addition & 2 deletions sklearn/cluster/mean_shift_.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,14 @@

import numpy as np
import warnings
from joblib import Parallel, delayed

from collections import defaultdict
from ..utils.validation import check_is_fitted
from ..utils import check_random_state, gen_batches, check_array
from ..base import BaseEstimator, ClusterMixin
from ..neighbors import NearestNeighbors
from ..metrics.pairwise import pairwise_distances_argmin
from ..utils._joblib import Parallel
from ..utils._joblib import delayed


def estimate_bandwidth(X, quantile=0.3, n_samples=None, random_state=0,
Expand Down
2 changes: 1 addition & 1 deletion sklearn/compose/_column_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@

import numpy as np
from scipy import sparse
from joblib import Parallel, delayed

from ..base import clone, TransformerMixin
from ..utils._joblib import Parallel, delayed
from ..pipeline import _fit_transform_one, _transform_one, _name_estimators
from ..preprocessing import FunctionTransformer
from ..utils import Bunch
Expand Down
2 changes: 1 addition & 1 deletion sklearn/covariance/graph_lasso_.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

import numpy as np
from scipy import linalg
from joblib import Parallel, delayed

from .empirical_covariance_ import (empirical_covariance, EmpiricalCovariance,
log_likelihood)
Expand All @@ -22,7 +23,6 @@
from ..linear_model import cd_fast
from ..linear_model import lars_path_gram
from ..model_selection import check_cv, cross_val_score
from ..utils._joblib import Parallel, delayed


# Helper functions to compute the objective and dual objective functions
Expand Down
7 changes: 4 additions & 3 deletions sklearn/datasets/california_housing.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,13 @@
import numpy as np
import logging

import joblib

from .base import get_data_home
from .base import _fetch_remote
from .base import _pkl_filepath
from .base import RemoteFileMetadata
from ..utils import Bunch
from ..utils import _joblib

# The original data can be found at:
# https://www.dcc.fc.up.pt/~ltorgo/Regression/cal_housing.tgz
Expand Down Expand Up @@ -124,11 +125,11 @@ def fetch_california_housing(data_home=None, download_if_missing=True,
columns_index = [8, 7, 2, 3, 4, 5, 6, 1, 0]
cal_housing = cal_housing[:, columns_index]

_joblib.dump(cal_housing, filepath, compress=6)
joblib.dump(cal_housing, filepath, compress=6)
remove(archive_path)

else:
cal_housing = _joblib.load(filepath)
cal_housing = joblib.load(filepath)

feature_names = ["MedInc", "HouseAge", "AveRooms", "AveBedrms",
"Population", "AveOccup", "Latitude", "Longitude"]
Expand Down
10 changes: 5 additions & 5 deletions sklearn/datasets/covtype.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@
from os import remove, makedirs

import numpy as np
import joblib

from .base import get_data_home
from .base import _fetch_remote
from .base import RemoteFileMetadata
from ..utils import Bunch
from .base import _pkl_filepath
from ..utils import _joblib
from ..utils import check_random_state

# The original data can be found in:
Expand Down Expand Up @@ -117,16 +117,16 @@ def fetch_covtype(data_home=None, download_if_missing=True,
X = Xy[:, :-1]
y = Xy[:, -1].astype(np.int32, copy=False)

_joblib.dump(X, samples_path, compress=9)
_joblib.dump(y, targets_path, compress=9)
joblib.dump(X, samples_path, compress=9)
joblib.dump(y, targets_path, compress=9)

elif not available and not download_if_missing:
raise IOError("Data not found and `download_if_missing` is False")
try:
X, y
except NameError:
X = _joblib.load(samples_path)
y = _joblib.load(targets_path)
X = joblib.load(samples_path)
y = joblib.load(targets_path)

if shuffle:
ind = np.arange(X.shape[0])
Expand Down
11 changes: 5 additions & 6 deletions sklearn/datasets/kddcup99.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,12 @@
from os.path import dirname, exists, join

import numpy as np

import joblib

from .base import _fetch_remote
from .base import get_data_home
from .base import RemoteFileMetadata
from ..utils import Bunch
from ..utils import _joblib
from ..utils import check_random_state
from ..utils import shuffle as shuffle_method

Expand Down Expand Up @@ -284,17 +283,17 @@ def _fetch_brute_kddcup99(data_home=None,
# (error: 'Incorrect data length while decompressing[...] the file
# could be corrupted.')

_joblib.dump(X, samples_path, compress=0)
_joblib.dump(y, targets_path, compress=0)
joblib.dump(X, samples_path, compress=0)
joblib.dump(y, targets_path, compress=0)
elif not available:
if not download_if_missing:
raise IOError("Data not found and `download_if_missing` is False")

try:
X, y
except NameError:
X = _joblib.load(samples_path)
y = _joblib.load(targets_path)
X = joblib.load(samples_path)
y = joblib.load(targets_path)

return Bunch(data=X, target=y)

Expand Down
8 changes: 4 additions & 4 deletions sklearn/datasets/lfw.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@
from distutils.version import LooseVersion

import numpy as np
import joblib
from joblib import Memory

from .base import get_data_home, _fetch_remote, RemoteFileMetadata
from ..utils import Bunch
from ..utils._joblib import Memory
from ..utils import _joblib

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -303,7 +303,7 @@ def fetch_lfw_people(data_home=None, funneled=True, resize=0.5,

# wrap the loader in a memoizing function that will return memmaped data
# arrays for optimal memory usage
if LooseVersion(_joblib.__version__) < LooseVersion('0.12'):
if LooseVersion(joblib.__version__) < LooseVersion('0.12'):
# Deal with change of API in joblib
m = Memory(cachedir=lfw_home, compress=6, verbose=0)
else:
Expand Down Expand Up @@ -474,7 +474,7 @@ def fetch_lfw_pairs(subset='train', data_home=None, funneled=True, resize=0.5,

# wrap the loader in a memoizing function that will return memmaped data
# arrays for optimal memory usage
if LooseVersion(_joblib.__version__) < LooseVersion('0.12'):
if LooseVersion(joblib.__version__) < LooseVersion('0.12'):
# Deal with change of API in joblib
m = Memory(cachedir=lfw_home, compress=6, verbose=0)
else:
Expand Down
6 changes: 3 additions & 3 deletions sklearn/datasets/olivetti_faces.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@

import numpy as np
from scipy.io.matlab import loadmat
import joblib

from .base import get_data_home
from .base import _fetch_remote
from .base import RemoteFileMetadata
from .base import _pkl_filepath
from ..utils import _joblib
from ..utils import check_random_state, Bunch

# The original data can be found at:
Expand Down Expand Up @@ -104,10 +104,10 @@ def fetch_olivetti_faces(data_home=None, shuffle=False, random_state=0,
remove(mat_path)

faces = mfile['faces'].T.copy()
_joblib.dump(faces, filepath, compress=6)
joblib.dump(faces, filepath, compress=6)
del mfile
else:
faces = _joblib.load(filepath)
faces = joblib.load(filepath)

# We want floating point data, but float32 is enough (there is only
# one byte of precision in the original uint8s anyway)
Expand Down
18 changes: 9 additions & 9 deletions sklearn/datasets/rcv1.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@

import numpy as np
import scipy.sparse as sp
import joblib

from .base import get_data_home
from .base import _pkl_filepath
from .base import _fetch_remote
from .base import RemoteFileMetadata
from ..utils import _joblib
from .svmlight_format import load_svmlight_files
from ..utils import shuffle as shuffle_
from ..utils import Bunch
Expand Down Expand Up @@ -181,16 +181,16 @@ def fetch_rcv1(data_home=None, subset='all', download_if_missing=True,
sample_id = np.hstack((Xy[9], Xy[1], Xy[3], Xy[5], Xy[7]))
sample_id = sample_id.astype(np.uint32, copy=False)

_joblib.dump(X, samples_path, compress=9)
_joblib.dump(sample_id, sample_id_path, compress=9)
joblib.dump(X, samples_path, compress=9)
joblib.dump(sample_id, sample_id_path, compress=9)

# delete archives
for f in files:
f.close()
remove(f.name)
else:
X = _joblib.load(samples_path)
sample_id = _joblib.load(sample_id_path)
X = joblib.load(samples_path)
sample_id = joblib.load(sample_id_path)

# load target (y), categories, and sample_id_bis
if download_if_missing and (not exists(sample_topics_path) or
Expand Down Expand Up @@ -240,11 +240,11 @@ def fetch_rcv1(data_home=None, subset='all', download_if_missing=True,
categories = categories[order]
y = sp.csr_matrix(y[:, order])

_joblib.dump(y, sample_topics_path, compress=9)
_joblib.dump(categories, topics_path, compress=9)
joblib.dump(y, sample_topics_path, compress=9)
joblib.dump(categories, topics_path, compress=9)
else:
y = _joblib.load(sample_topics_path)
categories = _joblib.load(topics_path)
y = joblib.load(sample_topics_path)
categories = joblib.load(topics_path)

if subset == 'all':
pass
Expand Down
8 changes: 4 additions & 4 deletions sklearn/datasets/species_distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,16 +41,16 @@
from os import makedirs, remove
from os.path import exists


import logging
import numpy as np

import joblib

from .base import get_data_home
from .base import _fetch_remote
from .base import RemoteFileMetadata
from ..utils import Bunch
from .base import _pkl_filepath
from ..utils import _joblib

# The original data can be found at:
# https://biodiversityinformatics.amnh.org/open_source/maxent/samples.zip
Expand Down Expand Up @@ -257,8 +257,8 @@ def fetch_species_distributions(data_home=None,
test=test,
train=train,
**extra_params)
_joblib.dump(bunch, archive_path, compress=9)
joblib.dump(bunch, archive_path, compress=9)
else:
bunch = _joblib.load(archive_path)
bunch = joblib.load(archive_path)

return bunch
6 changes: 3 additions & 3 deletions sklearn/datasets/twenty_newsgroups.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@

import numpy as np
import scipy.sparse as sp
import joblib

from .base import get_data_home
from .base import load_files
Expand All @@ -43,7 +44,6 @@
from .base import RemoteFileMetadata
from ..feature_extraction.text import CountVectorizer
from ..preprocessing import normalize
from ..utils import _joblib
from ..utils import check_random_state, Bunch

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -398,12 +398,12 @@ def fetch_20newsgroups_vectorized(subset="train", remove=(), data_home=None,
download_if_missing=download_if_missing)

if os.path.exists(target_file):
X_train, X_test = _joblib.load(target_file)
X_train, X_test = joblib.load(target_file)
else:
vectorizer = CountVectorizer(dtype=np.int16)
X_train = vectorizer.fit_transform(data_train.data).tocsr()
X_test = vectorizer.transform(data_test.data).tocsr()
_joblib.dump((X_train, X_test), target_file, compress=9)
joblib.dump((X_train, X_test), target_file, compress=9)

# the data is stored as int16 for compactness
# but normalize needs floats
Expand Down
2 changes: 1 addition & 1 deletion sklearn/decomposition/dict_learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@

import numpy as np
from scipy import linalg
from joblib import Parallel, delayed, effective_n_jobs

from ..base import BaseEstimator, TransformerMixin
from ..utils._joblib import Parallel, delayed, effective_n_jobs
from ..utils import (check_array, check_random_state, gen_even_slices,
gen_batches)
from ..utils.extmath import randomized_svd, row_norms
Expand Down
2 changes: 1 addition & 1 deletion sklearn/decomposition/online_lda.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,14 @@
import numpy as np
import scipy.sparse as sp
from scipy.special import gammaln
from joblib import Parallel, delayed, effective_n_jobs

from ..base import BaseEstimator, TransformerMixin
from ..utils import (check_random_state, check_array,
gen_batches, gen_even_slices)
from ..utils.fixes import logsumexp
from ..utils.validation import check_non_negative
from ..utils.validation import check_is_fitted
from ..utils._joblib import Parallel, delayed, effective_n_jobs

from ._online_lda import (mean_change, _dirichlet_expectation_1d,
_dirichlet_expectation_2d)
Expand Down
8 changes: 4 additions & 4 deletions sklearn/decomposition/tests/test_sparse_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,15 +142,15 @@ def test_mini_batch_fit_transform():
U1 = spca_lars.transform(Y)
# Test multiple CPUs
if sys.platform == 'win32': # fake parallelism for win32
import sklearn.utils._joblib.parallel as joblib_par
_mp = joblib_par.multiprocessing
joblib_par.multiprocessing = None
import joblib
_mp = joblib.parallel.multiprocessing
joblib.parallel.multiprocessing = None
try:
spca = MiniBatchSparsePCA(n_components=3, n_jobs=2, alpha=alpha,
random_state=0)
U2 = spca.fit(Y).transform(Y)
finally:
joblib_par.multiprocessing = _mp
joblib.parallel.multiprocessing = _mp
else: # we can efficiently use parallelism
spca = MiniBatchSparsePCA(n_components=3, n_jobs=2, alpha=alpha,
random_state=0)
Expand Down
3 changes: 2 additions & 1 deletion sklearn/ensemble/bagging.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@
from abc import ABCMeta, abstractmethod
from warnings import warn

from joblib import Parallel, delayed

from .base import BaseEnsemble, _partition_estimators
from ..base import ClassifierMixin, RegressorMixin
from ..utils._joblib import Parallel, delayed
from ..metrics import r2_score, accuracy_score
from ..tree import DecisionTreeClassifier, DecisionTreeRegressor
from ..utils import check_random_state, check_X_y, check_array, column_or_1d
Expand Down
Loading

0 comments on commit 1015caf

Please sign in to comment.