diff --git a/doc/datasets/index.rst b/doc/datasets/index.rst index 18c66b8975cbc..96ea729f8163e 100644 --- a/doc/datasets/index.rst +++ b/doc/datasets/index.rst @@ -3,7 +3,7 @@ >>> import numpy as np >>> import os - >>> from scikits.learn import datasets + >>> from sklearn import datasets >>> datasets.mldata.urllib2 = mock_urllib2 .. _datasets: @@ -12,9 +12,9 @@ Dataset loading utilities ========================= -.. currentmodule:: scikits.learn.datasets +.. currentmodule:: sklearn.datasets -The ``scikits.learn.datasets`` package embeds some small toy datasets +The ``sklearn.datasets`` package embeds some small toy datasets as introduced in the "Getting Started" section. To evaluate the impact of the scale of the dataset (``n_samples`` and @@ -108,7 +108,7 @@ Scipy sparse CSR matrices are used for ``X`` and numpy arrays are used for ``y`` You may load a dataset like this:: - >>> from scikits.learn.datasets import load_svmlight_file + >>> from sklearn.datasets import load_svmlight_file >>> X_train, y_train = load_svmlight_file("/path/to/train_dataset.txt") ... # doctest: +SKIP diff --git a/doc/datasets/labeled_faces.rst b/doc/datasets/labeled_faces.rst index 89673b4cd20e2..7f86da507ca92 100644 --- a/doc/datasets/labeled_faces.rst +++ b/doc/datasets/labeled_faces.rst @@ -39,7 +39,7 @@ less than 200ms by using a memmaped version memoized on the disk in the The first loader is used for the Face Identification task: a multi-class classification task (hence supervised learning):: - >>> from scikits.learn.datasets import fetch_lfw_people + >>> from sklearn.datasets import fetch_lfw_people >>> lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4) >>> for name in lfw_people.target_names: @@ -74,7 +74,7 @@ array:: The second loader is typically used for the face verification task: each sample is a pair of two picture belonging or not to the same person:: - >>> from scikits.learn.datasets import fetch_lfw_pairs + >>> from sklearn.datasets import fetch_lfw_pairs >>> lfw_pairs_train = fetch_lfw_pairs(subset='train') >>> list(lfw_pairs_train.target_names) diff --git a/doc/datasets/labeled_faces_fixture.py b/doc/datasets/labeled_faces_fixture.py index ac15044f1caa7..0d13c8ddd80a9 100644 --- a/doc/datasets/labeled_faces_fixture.py +++ b/doc/datasets/labeled_faces_fixture.py @@ -6,7 +6,7 @@ from os.path import exists from os.path import join from nose import SkipTest -from scikits.learn.datasets import get_data_home +from sklearn.datasets import get_data_home def setup_module(module): diff --git a/doc/datasets/mldata.rst b/doc/datasets/mldata.rst index 12d824b2a2a1d..65b10b3013787 100644 --- a/doc/datasets/mldata.rst +++ b/doc/datasets/mldata.rst @@ -4,12 +4,12 @@ Downloading datasets from the mldata.org repository `mldata.org `_ is a public repository for machine learning data, supported by the `PASCAL network `_ . -The ``scikits.learn.datasets`` package is able to directly download data +The ``sklearn.datasets`` package is able to directly download data sets from the repository using the function ``fetch_mldata(dataname)``. For example, to download the MNIST digit recognition database:: - >>> from scikits.learn.datasets import fetch_mldata + >>> from sklearn.datasets import fetch_mldata >>> mnist = fetch_mldata('MNIST original', data_home=custom_data_home) The MNIST database contains a total of 70000 examples of handwritten digits @@ -36,7 +36,7 @@ datasets: * The data arrays in `mldata.org `_ are most often shaped as ``(n_features, n_samples)``. This is the opposite of the - ``scikits.learn`` convention, so ``fetch_mldata`` transposes the matrix + ``scikit-learn`` convention, so ``fetch_mldata`` transposes the matrix by default. The ``transpose_data`` keyword controls this behavior:: >>> iris = fetch_mldata('iris', data_home=custom_data_home) diff --git a/doc/datasets/mldata_fixture.py b/doc/datasets/mldata_fixture.py index 192daa4af5598..2267288c38fb5 100644 --- a/doc/datasets/mldata_fixture.py +++ b/doc/datasets/mldata_fixture.py @@ -5,8 +5,8 @@ from os import makedirs from os.path import join -from scikits.learn import datasets -from scikits.learn.utils.testing import mock_urllib2 +from sklearn import datasets +from sklearn.utils.testing import mock_urllib2 import tempfile import scipy as sp import shutil diff --git a/doc/datasets/twenty_newsgroups.rst b/doc/datasets/twenty_newsgroups.rst index 13fd2f1955b80..c4fd379e2111a 100644 --- a/doc/datasets/twenty_newsgroups.rst +++ b/doc/datasets/twenty_newsgroups.rst @@ -13,21 +13,21 @@ provides a version where the data is already vectorized. This is not the case for this loader. Instead, it returns the list of the raw text files that can be fed to text feature extractors such as -:class:`scikits.learn.feature_extraction.text.Vectorizer` with custom +:class:`sklearn.feature_extraction.text.Vectorizer` with custom parameters so as to extract feature vectors. Usage ----- -The ``scikits.learn.datasets.fetch_20newsgroups`` function is a data +The ``sklearn.datasets.fetch_20newsgroups`` function is a data fetching / caching functions that downloads the data archive from the original `20 newsgroups website`_, extracts the archive contents in the ``~/scikit_learn_data/20news_home`` folder and calls the -``scikits.learn.datasets.load_file`` on either the training or +``sklearn.datasets.load_file`` on either the training or testing set folder, or both of them:: - >>> from scikits.learn.datasets import fetch_20newsgroups + >>> from sklearn.datasets import fetch_20newsgroups >>> newsgroups_train = fetch_20newsgroups(subset='train') >>> from pprint import pprint @@ -81,11 +81,11 @@ list of the categories to load to the ``fetch_20newsgroups`` function:: In order to feed predictive or clustering models with the text data, one first need to turn the text into vectors of numerical values suitable for statistical analysis. This can be achieved with the utilities of the -``scikits.learn.feature_extraction.text`` as demonstrated in the following +``sklearn.feature_extraction.text`` as demonstrated in the following example that extract `TF-IDF`_ vectors of unigram tokens:: - >>> from scikits.learn.feature_extraction.text import Vectorizer + >>> from sklearn.feature_extraction.text import Vectorizer >>> documents = [open(f).read() for f in newsgroups_train.filenames] >>> vectorizer = Vectorizer() >>> vectors = vectorizer.fit_transform(documents) diff --git a/doc/developers/performance.rst b/doc/developers/performance.rst index 250e0e82df0ac..d962348e6d9cb 100644 --- a/doc/developers/performance.rst +++ b/doc/developers/performance.rst @@ -26,7 +26,7 @@ code for the scikit-learn project. Python, Cython or C/C++? ======================== -.. currentmodule:: scikits.learn +.. currentmodule:: sklearn In general, the scikit-learn project emphasizes the **readability** of the source code to make it easy for the project users to dive into the @@ -89,9 +89,9 @@ Suppose we want to profile the Non Negative Matrix Factorization module of the scikit. Let us setup a new IPython session and load the digits dataset and as in the :ref:`example_decomposition_plot_nmf.py` example:: - In [1]: from scikits.learn.decomposition import NMF + In [1]: from sklearn.decomposition import NMF - In [2]: from scikits.learn.datasets import load_digits + In [2]: from sklearn.datasets import load_digits In [3]: X = load_digits().data @@ -188,16 +188,16 @@ Towards the end of the file, define the ``%lprun`` magic:: Now restart IPython and let us use this new toy:: - In [1]: from scikits.learn.datasets import load_digits + In [1]: from sklearn.datasets import load_digits - In [2]: from scikits.learn.decomposition.nmf import _nls_subproblem, NMF + In [2]: from sklearn.decomposition.nmf import _nls_subproblem, NMF In [3]: X = load_digits().data In [4]: %lprun -f _nls_subproblem NMF(n_components=16, tol=1e-2).fit(X) Timer unit: 1e-06 s - File: scikits/learn/decomposition/nmf.py + File: sklearn/decomposition/nmf.py Function: _nls_subproblem at line 137 Total time: 1.73153 s diff --git a/examples/cluster/README.txt b/examples/cluster/README.txt index 767b917d2bb63..1b38bab9cbb28 100644 --- a/examples/cluster/README.txt +++ b/examples/cluster/README.txt @@ -3,5 +3,5 @@ Clustering ---------- -Examples concerning the `scikits.learn.cluster` package. +Examples concerning the `sklearn.cluster` package. diff --git a/examples/covariance/README.txt b/examples/covariance/README.txt index 5160f8bb618cc..0767f1031d8b0 100644 --- a/examples/covariance/README.txt +++ b/examples/covariance/README.txt @@ -1,4 +1,4 @@ Covariance estimation --------------------- -Examples concerning the `scikits.learn.covariance` package. +Examples concerning the `sklearn.covariance` package. diff --git a/examples/decomposition/README.txt b/examples/decomposition/README.txt index c2bd41efe01f2..b5f710c810f77 100644 --- a/examples/decomposition/README.txt +++ b/examples/decomposition/README.txt @@ -3,5 +3,5 @@ Decomposition ------------- -Examples concerning the `scikits.learn.decomposition` package. +Examples concerning the `sklearn.decomposition` package. diff --git a/examples/gaussian_process/README.txt b/examples/gaussian_process/README.txt index c749e7a7e9dc9..216660e8acfe3 100644 --- a/examples/gaussian_process/README.txt +++ b/examples/gaussian_process/README.txt @@ -3,5 +3,5 @@ Gaussian Process for Machine Learning ------------------------------------- -Examples concerning the `scikits.learn.gaussian_process` package. +Examples concerning the `sklearn.gaussian_process` package. diff --git a/examples/gaussian_process/gp_diabetes_dataset.py b/examples/gaussian_process/gp_diabetes_dataset.py index f3d1e46cdf041..fbf0b791b6c03 100644 --- a/examples/gaussian_process/gp_diabetes_dataset.py +++ b/examples/gaussian_process/gp_diabetes_dataset.py @@ -27,7 +27,7 @@ from sklearn.gaussian_process import GaussianProcess from sklearn.cross_val import cross_val_score, KFold -# Load the dataset from scikits' data sets +# Load the dataset from scikit's data sets diabetes = datasets.load_diabetes() X, y = diabetes.data, diabetes.target diff --git a/examples/linear_model/README.txt b/examples/linear_model/README.txt index 77439a5aa15ea..d70d3bed9d2bf 100644 --- a/examples/linear_model/README.txt +++ b/examples/linear_model/README.txt @@ -2,4 +2,4 @@ Generalized Linear Models ------------------------- -Examples concerning the `scikits.learn.linear_model` package. +Examples concerning the `sklearn.linear_model` package. diff --git a/examples/manifold/README.txt b/examples/manifold/README.txt index 42a5dfcca72fe..13a9ed3bbe9ca 100644 --- a/examples/manifold/README.txt +++ b/examples/manifold/README.txt @@ -3,5 +3,5 @@ Manifold learning ----------------------- -Examples concerning the `scikits.learn.manifold` package. +Examples concerning the `sklearn.manifold` package. diff --git a/examples/manifold/plot_lle_digits.py.prof b/examples/manifold/plot_lle_digits.py.prof deleted file mode 100644 index 2d30366716f99..0000000000000 Binary files a/examples/manifold/plot_lle_digits.py.prof and /dev/null differ diff --git a/examples/mixture/README.txt b/examples/mixture/README.txt index bbf508e59d26d..1cc9671e40150 100644 --- a/examples/mixture/README.txt +++ b/examples/mixture/README.txt @@ -2,4 +2,4 @@ Gaussian Mixture Models ----------------------- -Examples concerning the `scikits.learn.mixture` package. +Examples concerning the `sklearn.mixture` package. diff --git a/examples/svm/README.txt b/examples/svm/README.txt index 9c83e641b5b68..f9f3b57afc456 100644 --- a/examples/svm/README.txt +++ b/examples/svm/README.txt @@ -3,5 +3,5 @@ Support Vector Machines ----------------------- -Examples concerning the `scikits.learn.svm` package. +Examples concerning the `sklearn.svm` package. diff --git a/setup.py b/setup.py index af79845f6e57b..1197d029ac5a3 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ import os import shutil -DISTNAME = 'sklearn' +DISTNAME = 'scikit-learn' DESCRIPTION = 'A set of python modules for machine learning and data mining' LONG_DESCRIPTION = open('README.rst').read() MAINTAINER = 'Fabian Pedregosa'