diff --git a/doc/datasets/index.rst b/doc/datasets/index.rst
index 18c66b8975cbc..96ea729f8163e 100644
--- a/doc/datasets/index.rst
+++ b/doc/datasets/index.rst
@@ -3,7 +3,7 @@
 
     >>> import numpy as np
     >>> import os
-    >>> from scikits.learn import datasets
+    >>> from sklearn import datasets
     >>> datasets.mldata.urllib2 = mock_urllib2
 
 .. _datasets:
@@ -12,9 +12,9 @@
 Dataset loading utilities
 =========================
 
-.. currentmodule:: scikits.learn.datasets
+.. currentmodule:: sklearn.datasets
 
-The ``scikits.learn.datasets`` package embeds some small toy datasets
+The ``sklearn.datasets`` package embeds some small toy datasets
 as introduced in the "Getting Started" section.
 
 To evaluate the impact of the scale of the dataset (``n_samples`` and
@@ -108,7 +108,7 @@ Scipy sparse CSR matrices are used for ``X`` and numpy arrays are used for ``y``
 
 You may load a dataset like this::
 
-  >>> from scikits.learn.datasets import load_svmlight_file
+  >>> from sklearn.datasets import load_svmlight_file
   >>> X_train, y_train = load_svmlight_file("/path/to/train_dataset.txt")
   ...                                                         # doctest: +SKIP
 
diff --git a/doc/datasets/labeled_faces.rst b/doc/datasets/labeled_faces.rst
index 89673b4cd20e2..7f86da507ca92 100644
--- a/doc/datasets/labeled_faces.rst
+++ b/doc/datasets/labeled_faces.rst
@@ -39,7 +39,7 @@ less than 200ms by using a memmaped version memoized on the disk in the
 The first loader is used for the Face Identification task: a multi-class
 classification task (hence supervised learning)::
 
-  >>> from scikits.learn.datasets import fetch_lfw_people
+  >>> from sklearn.datasets import fetch_lfw_people
   >>> lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4)
 
   >>> for name in lfw_people.target_names:
@@ -74,7 +74,7 @@ array::
 The second loader is typically used for the face verification task: each sample
 is a pair of two picture belonging or not to the same person::
 
-  >>> from scikits.learn.datasets import fetch_lfw_pairs
+  >>> from sklearn.datasets import fetch_lfw_pairs
   >>> lfw_pairs_train = fetch_lfw_pairs(subset='train')
 
   >>> list(lfw_pairs_train.target_names)
diff --git a/doc/datasets/labeled_faces_fixture.py b/doc/datasets/labeled_faces_fixture.py
index ac15044f1caa7..0d13c8ddd80a9 100644
--- a/doc/datasets/labeled_faces_fixture.py
+++ b/doc/datasets/labeled_faces_fixture.py
@@ -6,7 +6,7 @@
 from os.path import exists
 from os.path import join
 from nose import SkipTest
-from scikits.learn.datasets import get_data_home
+from sklearn.datasets import get_data_home
 
 
 def setup_module(module):
diff --git a/doc/datasets/mldata.rst b/doc/datasets/mldata.rst
index 12d824b2a2a1d..65b10b3013787 100644
--- a/doc/datasets/mldata.rst
+++ b/doc/datasets/mldata.rst
@@ -4,12 +4,12 @@ Downloading datasets from the mldata.org repository
 `mldata.org <http://mldata.org>`_ is a public repository for machine learning
 data, supported by the `PASCAL network <http://www.pascal-network.org>`_ .
 
-The ``scikits.learn.datasets`` package is able to directly download data
+The ``sklearn.datasets`` package is able to directly download data
 sets from the repository using the function ``fetch_mldata(dataname)``.
 
 For example, to download the MNIST digit recognition database::
 
-  >>> from scikits.learn.datasets import fetch_mldata
+  >>> from sklearn.datasets import fetch_mldata
   >>> mnist = fetch_mldata('MNIST original', data_home=custom_data_home)
 
 The MNIST database contains a total of 70000 examples of handwritten digits
@@ -36,7 +36,7 @@ datasets:
 
 * The data arrays in `mldata.org <http://mldata.org>`_ are most often
   shaped as ``(n_features, n_samples)``. This is the opposite of the
-  ``scikits.learn`` convention, so ``fetch_mldata`` transposes the matrix
+  ``scikit-learn`` convention, so ``fetch_mldata`` transposes the matrix
   by default. The ``transpose_data`` keyword controls this behavior::
 
     >>> iris = fetch_mldata('iris', data_home=custom_data_home)
diff --git a/doc/datasets/mldata_fixture.py b/doc/datasets/mldata_fixture.py
index 192daa4af5598..2267288c38fb5 100644
--- a/doc/datasets/mldata_fixture.py
+++ b/doc/datasets/mldata_fixture.py
@@ -5,8 +5,8 @@
 
 from os import makedirs
 from os.path import join
-from scikits.learn import datasets
-from scikits.learn.utils.testing import mock_urllib2
+from sklearn import datasets
+from sklearn.utils.testing import mock_urllib2
 import tempfile
 import scipy as sp
 import shutil
diff --git a/doc/datasets/twenty_newsgroups.rst b/doc/datasets/twenty_newsgroups.rst
index 13fd2f1955b80..c4fd379e2111a 100644
--- a/doc/datasets/twenty_newsgroups.rst
+++ b/doc/datasets/twenty_newsgroups.rst
@@ -13,21 +13,21 @@ provides a version where the data is already vectorized.
 
 This is not the case for this loader. Instead, it returns the list of
 the raw text files that can be fed to  text feature extractors such as
-:class:`scikits.learn.feature_extraction.text.Vectorizer` with custom
+:class:`sklearn.feature_extraction.text.Vectorizer` with custom
 parameters so as to extract feature vectors.
 
 
 Usage
 -----
 
-The ``scikits.learn.datasets.fetch_20newsgroups`` function is a data
+The ``sklearn.datasets.fetch_20newsgroups`` function is a data
 fetching / caching functions that downloads the data archive from
 the original `20 newsgroups website`_, extracts the archive contents
 in the ``~/scikit_learn_data/20news_home`` folder and calls the
-``scikits.learn.datasets.load_file`` on either the training or
+``sklearn.datasets.load_file`` on either the training or
 testing set folder, or both of them::
 
-  >>> from scikits.learn.datasets import fetch_20newsgroups
+  >>> from sklearn.datasets import fetch_20newsgroups
   >>> newsgroups_train = fetch_20newsgroups(subset='train')
 
   >>> from pprint import pprint
@@ -81,11 +81,11 @@ list of the categories to load to the ``fetch_20newsgroups`` function::
 In order to feed predictive or clustering models with the text data,
 one first need to turn the text into vectors of numerical values suitable
 for statistical analysis. This can be achieved with the utilities of the
-``scikits.learn.feature_extraction.text`` as demonstrated in the following
+``sklearn.feature_extraction.text`` as demonstrated in the following
 example that extract `TF-IDF`_ vectors of unigram tokens::
 
 
-  >>> from scikits.learn.feature_extraction.text import Vectorizer
+  >>> from sklearn.feature_extraction.text import Vectorizer
   >>> documents = [open(f).read() for f in newsgroups_train.filenames]
   >>> vectorizer = Vectorizer()
   >>> vectors = vectorizer.fit_transform(documents)
diff --git a/doc/developers/performance.rst b/doc/developers/performance.rst
index 250e0e82df0ac..d962348e6d9cb 100644
--- a/doc/developers/performance.rst
+++ b/doc/developers/performance.rst
@@ -26,7 +26,7 @@ code for the scikit-learn project.
 Python, Cython or C/C++?
 ========================
 
-.. currentmodule:: scikits.learn
+.. currentmodule:: sklearn
 
 In general, the scikit-learn project emphasizes the **readability** of
 the source code to make it easy for the project users to dive into the
@@ -89,9 +89,9 @@ Suppose we want to profile the Non Negative Matrix Factorization module
 of the scikit. Let us setup a new IPython session and load the digits
 dataset and as in the :ref:`example_decomposition_plot_nmf.py` example::
 
-  In [1]: from scikits.learn.decomposition import NMF
+  In [1]: from sklearn.decomposition import NMF
 
-  In [2]: from scikits.learn.datasets import load_digits
+  In [2]: from sklearn.datasets import load_digits
 
   In [3]: X = load_digits().data
 
@@ -188,16 +188,16 @@ Towards the end of the file, define the ``%lprun`` magic::
 
 Now restart IPython and let us use this new toy::
 
-  In [1]: from scikits.learn.datasets import load_digits
+  In [1]: from sklearn.datasets import load_digits
 
-  In [2]: from scikits.learn.decomposition.nmf import _nls_subproblem, NMF
+  In [2]: from sklearn.decomposition.nmf import _nls_subproblem, NMF
 
   In [3]: X = load_digits().data
 
   In [4]: %lprun -f _nls_subproblem NMF(n_components=16, tol=1e-2).fit(X)
   Timer unit: 1e-06 s
 
-  File: scikits/learn/decomposition/nmf.py
+  File: sklearn/decomposition/nmf.py
   Function: _nls_subproblem at line 137
   Total time: 1.73153 s
 
diff --git a/examples/cluster/README.txt b/examples/cluster/README.txt
index 767b917d2bb63..1b38bab9cbb28 100644
--- a/examples/cluster/README.txt
+++ b/examples/cluster/README.txt
@@ -3,5 +3,5 @@
 Clustering
 ----------
 
-Examples concerning the `scikits.learn.cluster` package.
+Examples concerning the `sklearn.cluster` package.
 
diff --git a/examples/covariance/README.txt b/examples/covariance/README.txt
index 5160f8bb618cc..0767f1031d8b0 100644
--- a/examples/covariance/README.txt
+++ b/examples/covariance/README.txt
@@ -1,4 +1,4 @@
 Covariance estimation
 ---------------------
 
-Examples concerning the `scikits.learn.covariance` package.
+Examples concerning the `sklearn.covariance` package.
diff --git a/examples/decomposition/README.txt b/examples/decomposition/README.txt
index c2bd41efe01f2..b5f710c810f77 100644
--- a/examples/decomposition/README.txt
+++ b/examples/decomposition/README.txt
@@ -3,5 +3,5 @@
 Decomposition 
 -------------
 
-Examples concerning the `scikits.learn.decomposition` package.
+Examples concerning the `sklearn.decomposition` package.
 
diff --git a/examples/gaussian_process/README.txt b/examples/gaussian_process/README.txt
index c749e7a7e9dc9..216660e8acfe3 100644
--- a/examples/gaussian_process/README.txt
+++ b/examples/gaussian_process/README.txt
@@ -3,5 +3,5 @@
 Gaussian Process for Machine Learning
 -------------------------------------
 
-Examples concerning the `scikits.learn.gaussian_process` package.
+Examples concerning the `sklearn.gaussian_process` package.
 
diff --git a/examples/gaussian_process/gp_diabetes_dataset.py b/examples/gaussian_process/gp_diabetes_dataset.py
index f3d1e46cdf041..fbf0b791b6c03 100644
--- a/examples/gaussian_process/gp_diabetes_dataset.py
+++ b/examples/gaussian_process/gp_diabetes_dataset.py
@@ -27,7 +27,7 @@
 from sklearn.gaussian_process import GaussianProcess
 from sklearn.cross_val import cross_val_score, KFold
 
-# Load the dataset from scikits' data sets
+# Load the dataset from scikit's data sets
 diabetes = datasets.load_diabetes()
 X, y = diabetes.data, diabetes.target
 
diff --git a/examples/linear_model/README.txt b/examples/linear_model/README.txt
index 77439a5aa15ea..d70d3bed9d2bf 100644
--- a/examples/linear_model/README.txt
+++ b/examples/linear_model/README.txt
@@ -2,4 +2,4 @@
 Generalized Linear Models
 -------------------------
 
-Examples concerning the `scikits.learn.linear_model` package.
+Examples concerning the `sklearn.linear_model` package.
diff --git a/examples/manifold/README.txt b/examples/manifold/README.txt
index 42a5dfcca72fe..13a9ed3bbe9ca 100644
--- a/examples/manifold/README.txt
+++ b/examples/manifold/README.txt
@@ -3,5 +3,5 @@
 Manifold learning
 -----------------------
 
-Examples concerning the `scikits.learn.manifold` package.
+Examples concerning the `sklearn.manifold` package.
 
diff --git a/examples/manifold/plot_lle_digits.py.prof b/examples/manifold/plot_lle_digits.py.prof
deleted file mode 100644
index 2d30366716f99..0000000000000
Binary files a/examples/manifold/plot_lle_digits.py.prof and /dev/null differ
diff --git a/examples/mixture/README.txt b/examples/mixture/README.txt
index bbf508e59d26d..1cc9671e40150 100644
--- a/examples/mixture/README.txt
+++ b/examples/mixture/README.txt
@@ -2,4 +2,4 @@
 Gaussian Mixture Models
 -----------------------
 
-Examples concerning the `scikits.learn.mixture` package.
+Examples concerning the `sklearn.mixture` package.
diff --git a/examples/svm/README.txt b/examples/svm/README.txt
index 9c83e641b5b68..f9f3b57afc456 100644
--- a/examples/svm/README.txt
+++ b/examples/svm/README.txt
@@ -3,5 +3,5 @@
 Support Vector Machines
 -----------------------
 
-Examples concerning the `scikits.learn.svm` package.
+Examples concerning the `sklearn.svm` package.
 
diff --git a/setup.py b/setup.py
index af79845f6e57b..1197d029ac5a3 100644
--- a/setup.py
+++ b/setup.py
@@ -9,7 +9,7 @@
 import os
 import shutil
 
-DISTNAME = 'sklearn'
+DISTNAME = 'scikit-learn'
 DESCRIPTION = 'A set of python modules for machine learning and data mining'
 LONG_DESCRIPTION = open('README.rst').read()
 MAINTAINER = 'Fabian Pedregosa'