diff --git a/INSTALL.rst.txt b/INSTALL.rst.txt
index 001f25bec2a6..b48b4558e639 100644
--- a/INSTALL.rst.txt
+++ b/INSTALL.rst.txt
@@ -196,7 +196,7 @@ TROUBLESHOOTING
 ===============
 
 If you experience problems when building/installing/testing SciPy, you
-can ask help from scipy-user@scipy.org or scipy-dev@scipy.org mailing
+can ask help from scipy-user@python.org or scipy-dev@python.org mailing
 lists. Please include the following information in your message:
 
 NOTE: You can generate some of the following information (items 1-5,7)
diff --git a/LICENSE.txt b/LICENSE.txt
index 3d3399465fdf..a2b41f5cbf4b 100644
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -1,7 +1,7 @@
 Copyright (c) 2001, 2002 Enthought, Inc.
 All rights reserved.
 
-Copyright (c) 2003-2016 SciPy Developers.
+Copyright (c) 2003-2017 SciPy Developers.
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
diff --git a/README.rst b/README.rst
index 6b6c6e192631..e89cf408a1bc 100644
--- a/README.rst
+++ b/README.rst
@@ -54,9 +54,10 @@ The user's site is:
 Mailing Lists
 -------------
 
-Please see the developer's list here:
+The addresses and archives for the developer and user mailing lists can be
+found here:
 
-    http://projects.scipy.org/mailman/listinfo/scipy-dev
+    http://scipy.org/scipylib/mailing-lists.html
 
 
 Latest source code
diff --git a/THANKS.txt b/THANKS.txt
index 7c5ae830d743..dbd8a9e721d8 100644
--- a/THANKS.txt
+++ b/THANKS.txt
@@ -7,7 +7,7 @@ integral to wrapping the many Fortran libraries used in SciPy.
 
 Since then many people have contributed to SciPy, both in code development,
 suggestions, and financial support.  Below is a partial list.  If you've
-been left off, please email the "SciPy Developers List" <scipy-dev@scipy.org>.
+been left off, please email the "SciPy Developers List" <scipy-dev@python.org>.
 
 Please add names as needed so that we can keep up with all the contributors.
 
diff --git a/bento.info b/bento.info
index 9edf81bbd6be..f57b621572ba 100644
--- a/bento.info
+++ b/bento.info
@@ -15,7 +15,7 @@ Description:
     scientists and engineers. If you need to manipulate numbers on a computer
     and display or publish the results, give SciPy a try!
 Maintainer: SciPy Developers
-MaintainerEmail: scipy-dev@scipy.org
+MaintainerEmail: scipy-dev@python.org
 License: BSD
 Platforms: Windows,Linux,Solaris,Mac OS-X,Unix
 Classifiers:
diff --git a/scipy/cluster/doc/README.txt b/scipy/cluster/doc/README.txt
deleted file mode 100644
index 5c7930f3f1fb..000000000000
--- a/scipy/cluster/doc/README.txt
+++ /dev/null
@@ -1,215 +0,0 @@
-Hey Costas,
-
-Glad to see someone using the kmeans stuff.
-
-> --I confess to not understanding the docstring
-
-Sorry for the confusion.  I'll try to explain thing more clearly.  If it works, will use this as the doc. :)
-
-> However, I am not quite following the kmeans() functionality (I am new to 
-> this clustering business, so this maybe a stupid newbie question): my docs 
-> tell me that kmeans should partition a dataset into k clusters.  So, I 
-> expect vq.kmeans(dataset, 2) to return to me dataset split up into two 
-> "equivalent" datasets.
-
-Splitting the data into two data sets is actually a two or three step process. 
-Here's a complete example.
-
-"Observations" are just another name for a data point.  The obs matrix is a 2D 
-array of data points.  For example if our data set includes height, weight, and 
-40-yard speed of football players, you might have the following (fictitous) 
-data:
-
-obs:
-                            lb        inches     seconds       
-                           ----------------------------
-    Refrigerator Perry    | 400         79         5.4
-    Jerry Rice            | 180         76         4.5
-    Zachary Jones         |  28         25        30.0
-    Too Tall Jones        | 270         81         5.0
-    Charlie Joiner        | 185         78         4.6
-
-The data above is the "obs" matrix.  Each row in the 2D array is a data point, 
-often called an "observation" or "sample".  Each column is sometimes called the 
-"features" of a player. Imagine, we want to split this data set into two 
-"clusters", perhaps dividing the data into linemen and receivers.  One way to 
-find two "codes", one to represent each of these groups.  (I think the term 
-"code" comes from communication theory, but I'm not sure.  Perhaps "classes" is 
-more descriptive.)  I watched enough games (observed enough players) to make an 
-educated guess as to what these codes might be:
-
-possible code book:
-
-                code        lb         inches     seconds
-                            -----------------------------
-    receiver     0         | 180          75         4.8
-    lineman      1         | 260          76         5.5
-
-
-So code 0 stands for a "typical" receiver and code 1 represents your typical 
-lineman.  "Vector quantization" is an algorithm that calculates the distance 
-between a data point and every code in the "code book" to find the closest one 
-(i.e. which class is the best match). In scipy.cluster, the vq module houses 
-the vector quantization tools. vq.vq(obs,code_book) returns 2 arrays -- (1) the 
-index (row in the code book) of the code nearest to each data point, and (2) 
-the distance that each data point is from that code. code_book is always a 2D
-array.  If obs is a 2D array, each row is a separate data point.
-
-    # note I rounded some numbers to save space
-    >>> obs = array(((400, 79, 5.4),
-    ...              (180, 76, 4.5),
-    ...              (28,  25, 30.),
-    ...              (270, 81, 5.0),
-    ...              (185, 78, 4.6)))
-    >>> code_book = array(((180, 75, 4.8),
-    ...                    (260, 76, 5.5)))
-    >>> code_id, distortion = vq.vq(obs,code_book)
-    >>> code_id
-    array([1, 0, 0, 1, 0])
-    >>> distortion
-    array([ 140.03, 1.045, 161.985, 11.192, 5.834]) 
-
-code_id now tells what position each of the football players is most likely to 
-play. Distortion is the distance, using sqrt( a^2 + b^2 + c^2), that each 
-player is from the code (typical player) in their category.  Low numbers mean 
-the match is good.  For example, vq tells us that Jerry Rice is receiver, and 
-the low distortion means that it is pretty dang sure about that. 
-
-                             code_id          distortion
-                         ------------------------------ 
-    Refrigerator Perry     1 --> lineman         140.03      
-    Jerry Rice             0 --> receiver          1.04
-    Zachary Jones          0 --> receiver        161.99
-    Too Tall Jones         1 --> lineman          11.19
-    Charlie Joiner         0 --> receiver          5.83
-
-Most of the classifications make sense, but the distortions have some problems. 
-Notably that my 1 year old son is about as likely to be a receiver as R. Perry 
-is to be a lineman.  Looking at the data, it is obvious that R. Perry is a 
-lineman. It isn't obvious where Zach falls because he's small (like a receiver) 
-and slow (like a lineman).  So we should be quite a bit more sure about the 
-fact that R. Perry is a lineman.  What's wrong?  Well, the distortion value's 
-primary contribution comes from the large weight differences between these 
-players and the typical players.  Even though Zach's speed is a long way from 
-receiver's speed this doesn't contribute to the distortion much.  That's bad 
-because the speed difference carries a lot of useful information.  For more 
-accurate distortion values, we would like each feature of a player 
-(weight, height, and speed) to be weighted equally.  One way of doing this is 
-to "whiten" the features of both the data and the code book by dividing each 
-feature by the standard deviation of that feature set.  
-
-The standard deviation of weights and speeds across all the players are:
-
-    #weight
-    >>> stats.stdev((400,180,28,270,185))
-    136.30407183939883
-    #speed
-    >>> stats.stdev((5.4,4.5,30.0,5.0,4.6))
-    11.241885962773329
-
-So the whitened weight and speed distortions for R. Perry from a typical 
-lineman is:
-
-    # whitened weight
-    >>> abs(400-260)/136.3
-    1.0271460014673512   
-    # whitened speed
-    >>> abs(5.4-5.5)/11.24
-    0.0088967971530248789
-
-So the whitened weight and speed distortions for Z. Jones from a typical 
-receiver is:
-
-    # whitened weight
-    >>> abs(28-180)/136.3
-    1.1151870873074101
-    # whitened speed
-    >>> (30.0-4.8)/11.24
-    2.2419928825622777
-
-It is apparent from these values that Zach's speed difference is gonna have
-a large affect on the distortion now.  
-
-The whiten() function handles the chore of whitening a data set.
-
-    >>> wh_obs = whiten(obs)
-
-Usually, the code book is actually calculated from a whitened set of data (via 
-kmeans or some other method), and thus are automatically white.  In this case,
-I specified the code_book in non-whitened units, so they'll need to be normalized
-by the same standard deviation of the data set.
-
-    # not normally needed
-    >>> wh_code_book = code_book / std(obs,axis=0)
-
-Now, rerunning vq gives:
-    
-    >>> code_id, distortion = vq.vq(wh_obs,wh_code_book)
-    >>> code_id
-    array([1, 0, 0, 1, 0])
-    >>> distortion
-    array([ 1.034, 0.049,  3.257 ,  0.225,  0.131])
-    
-                             code_id         whitened distortion
-                         --------------------------------------- 
-    Refrigerator Perry     1 --> lineman             1.034      
-    Jerry Rice             0 --> receiver            0.049
-    Zachary Jones          0 --> receiver            3.257
-    Too Tall Jones         1 --> lineman             0.225
-    Charlie Joiner         0 --> receiver            0.131
-
-Now Zach's distortion is much higher than everyone elses which makes sense.
-
-In the example above, I made an educated guess based on my knowledge of 
-football of what the size and speed of a typical player in a given position 
-might be.  This is information I had to supply to the clustering algorithm
-before it could determine how to classify each player.  Suppose you didn't
-know anything about football, and watched a football gamebut were given a list of players with there
-position, weight, height, and speed.  You could use this information to 
-educate yourself about the traits of a typical receiver, linebacker, lineman,
-etc.  The kmeans algorithm also does this.  It takes a set of data, and
-the number of positions you want to categorize 
-
------ Original Message ----- 
-From: "Costas Malamas" <costasm@hotmail.com>
-To: <scipy-user@scipy.net>
-Sent: Monday, January 07, 2002 5:50 PM
-Subject: [SciPy-user] Kmeans help and C source
-
-
-> Hi all,
-> 
-> I need to use a modified K-means algorithm for a project and I was delighted 
-> to discover that SciPy includes a python wrapper for a kmeans() function.
-> 
-> However, I am not quite following the kmeans() functionality (I am new to 
-> this clustering business, so this maybe a stupid newbie question): my docs 
-> tell me that kmeans should partition a dataset into k clusters.  So, I 
-> expect vq.kmeans(dataset, 2) to return to me dataset split up into two 
-> "equivalent" datasets.  However, anyway I feed my data into vq.kmeans() this 
-> doesn't happen (e.g. I feed it a 5x4 dataset and I get back two 5x1 
-> vectors).  
-> My guess is that either this vq.kmeans() does something different 
-> --I confess to not understanding the docstring as the observation/codebook 
-> terminology has no parallel to the docs I've read-- or that I am not doing 
-> something right.  Any pointers? Even some documentation on the algorithm 
-> would be great help.
-> 
-> Secondly, as I mentioned above, I need a modified kmeans.  However, I see no 
-> C/Fortran code in the src tarball or CVS that seems related to kmeans.  Is 
-> the base code available?  If so, is it hackable by a SWIG newbie? (I am 
-> aware of SWIG, but I have never used it for anything serious).
-> 
-> Any and all info will be greatly appreciated :-) --and thanks for SciPy!
-> 
-> 
-> Costas Malamas
-> 
-> _________________________________________________________________
-> Get your FREE download of MSN Explorer at http://explorer.msn.com/intl.asp.
-> 
-> _______________________________________________
-> SciPy-user mailing list
-> SciPy-user@scipy.net
-> http://www.scipy.net/mailman/listinfo/scipy-user
-> 
diff --git a/scipy/cluster/doc/ex1.py b/scipy/cluster/doc/ex1.py
deleted file mode 100755
index 37272a2e7c9f..000000000000
--- a/scipy/cluster/doc/ex1.py
+++ /dev/null
@@ -1,40 +0,0 @@
-from __future__ import division, print_function, absolute_import
-
-import numpy as np
-from scipy.cluster import vq
-
-
-def cluster_data(data,cluster_cnt,iter=20,thresh=1e-5):
-    """ Group data into a number of common clusters
-
-        data -- 2D array of data points.  Each point is a row in the array.
-        cluster_cnt -- The number of clusters to use
-        iter -- number of iterations to use for kmeans algorithm
-        thresh -- distortion threshold for kmeans algorithm
-
-        return -- list of 2D arrays.  Each array contains the data points
-                  that belong to a specific cluster.
-
-        Uses kmeans algorithm to find the clusters.
-    """
-    wh_data = vq.whiten(data)
-    code_book,dist = vq.kmeans(wh_data,cluster_cnt,iter,thresh)
-    code_ids, distortion = vq.vq(wh_data,code_book)
-    clusters = []
-    for i in range(len(code_book)):
-        cluster = np.compress(code_ids == i,data,0)
-        clusters.append(cluster)
-    return clusters
-
-if __name__ == "__main__":
-
-    data = np.array(((400, 79, 5.4),
-                     (180, 76, 4.5),
-                     (28, 25, 30.),
-                     (270, 81, 5.0),
-                     (185, 78, 4.6)))
-
-    clusters = cluster_data(data,2)
-    for i in range(len(clusters)):
-        print('cluster %d:' % i)
-        print(clusters[i])
diff --git a/scipy/cluster/setup.py b/scipy/cluster/setup.py
index 4b444eb7ad65..793ed6a49c44 100755
--- a/scipy/cluster/setup.py
+++ b/scipy/cluster/setup.py
@@ -29,13 +29,7 @@ def configuration(parent_package='', top_path=None):
 
     return config
 
+
 if __name__ == '__main__':
     from numpy.distutils.core import setup
-    setup(maintainer="SciPy Developers",
-          author="Eric Jones",
-          maintainer_email="scipy-dev@scipy.org",
-          description="Clustering Algorithms (Information Theory)",
-          url="https://www.scipy.org",
-          license="SciPy License (BSD Style)",
-          **configuration(top_path='').todict()
-          )
+    setup(**configuration(top_path='').todict())
diff --git a/scipy/fftpack/NOTES.txt b/scipy/fftpack/NOTES.txt
deleted file mode 100644
index 752c5f2f8b58..000000000000
--- a/scipy/fftpack/NOTES.txt
+++ /dev/null
@@ -1,93 +0,0 @@
-.. -*- rest -*-
-
-================================================
-  fftpack - Discrete Fourier Transform package
-================================================
-
-:Author: Pearu Peterson <pearu@cens.ioc.ee>
-:Last changed: $Date$
-:Revision: $Revision$
-:Discussions to: scipy-dev@scipy.org
-
-Installation
-============
-
-To build fftpack, you'll need F2PY version 2.23.190-1367 or higher.
-
-Run
-
-::
-
-  python setup.py install
-
-Testing
-=======
-
-Run
-
-::
-
-  python -c "import scipy.fftpack; scipy.fftpack.test()"
-
-or from python
-
->>> import scipy.fftpack
->>> scipy.fftpack.test(10)
-
-
-Differences between fftpack and FFT from Numeric
-================================================
-
-* Functions rfft and irfft accept and return only real sequences. So,
-  the corresponding functions real_fft, inverse_real_fft from FFT are
-  not equivalent with rfft and irfft. The difference is in the storage
-  of data, see the definitions of corresponding functions for details.
-
-* PROPOSAL: When calling ifft with forced truncation or zero-padding
-  then I would like to propose that the corresponding action is
-  applied to the middle of data. For example, ifft([1,2,3,4,5,6],n=8)
-  is equivalent to ifft([1,2,3,4,0,0,5,6]), that is, the Fourier terms
-  with higher frequencies and zero coefficients are introduced. In the
-  Numeric.FFT case, the example above would be equivalent to
-  ifft([1,2,3,4,5,6,0,0],n=8), which would mean that Fourier
-  coefficients [5,6] become the coefficients of higher frequency terms
-  and the original terms are zerod.
-
-  Note that this proposal is **not implemented** because it needs to
-  be discussed. For instance, Matlab uses the same convention as FFT
-  and this change would be confusing for Matlab users.  On the other
-  hand, FFT or Matlab's conventions change the spectrum of the
-  original signal and I don't see any sense in this behaviour (if you
-  don't agree then please provide an example). Namely, one of the
-  applications of the argument n would be to compose a new signal with
-  a more dense or sparse grid than the original one by using
-
-  ::
-
-    new_signal = ifft(fft(signal),n)
-
-  Note that the new_signal would have the same Fourier spectrum as
-  original signal. With Matlab/FFT convention this is not true.  Any
-  thoughts?
-
-
-Nyquist mode
-============
-
-Pseudo-differential and differential operators of odd order assume
-that the Nyquist mode is zero that reduces the amplification of
-numerical noise from the aliasing effect.
-
-To do
-=====
-
-basic.py
-  - Optimize ``fftn()`` for real input.
-  - Implement ``rfftn()`` and ``irfftn()``.
-  - Implement discrete cosine/sine transforms
-pseudo_diffs.py
-  - Optimize functions for complex input.
-src/convolve.c
-  - See if convolve could use convolution function from DJBFFT.
-
-If you have any comments, please send them to scipy-dev@scipy.org.
diff --git a/scipy/fftpack/__init__.py b/scipy/fftpack/__init__.py
index 4c1d86201de7..bd2c5fe48949 100644
--- a/scipy/fftpack/__init__.py
+++ b/scipy/fftpack/__init__.py
@@ -92,8 +92,6 @@
            'next_fast_len',
            ]
 
-from .fftpack_version import fftpack_version as __version__
-
 from .basic import *
 from .pseudo_diffs import *
 from .helper import *
diff --git a/scipy/fftpack/fftpack_version.py b/scipy/fftpack/fftpack_version.py
deleted file mode 100644
index 0e5499a11bcc..000000000000
--- a/scipy/fftpack/fftpack_version.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from __future__ import division, print_function, absolute_import
-
-major = 0
-minor = 4
-micro = 3
-
-
-fftpack_version = '%(major)d.%(minor)d.%(micro)d' % (locals())
diff --git a/scipy/fftpack/setup.py b/scipy/fftpack/setup.py
index 57c027f41726..506d7a2e0927 100755
--- a/scipy/fftpack/setup.py
+++ b/scipy/fftpack/setup.py
@@ -35,13 +35,7 @@ def configuration(parent_package='',top_path=None):
     )
     return config
 
+
 if __name__ == '__main__':
     from numpy.distutils.core import setup
-    from fftpack_version import fftpack_version
-    setup(version=fftpack_version,
-          description='fftpack - Discrete Fourier Transform package',
-          author='Pearu Peterson',
-          author_email='pearu@cens.ioc.ee',
-          maintainer_email='scipy-dev@scipy.org',
-          license='SciPy License (BSD Style)',
-          **configuration(top_path='').todict())
+    setup(**configuration(top_path='').todict())
diff --git a/scipy/sparse/linalg/eigen/lobpcg/tests/test_lobpcg.py b/scipy/sparse/linalg/eigen/lobpcg/tests/test_lobpcg.py
index 396276a682ff..64c5b4a2e402 100644
--- a/scipy/sparse/linalg/eigen/lobpcg/tests/test_lobpcg.py
+++ b/scipy/sparse/linalg/eigen/lobpcg/tests/test_lobpcg.py
@@ -79,7 +79,7 @@ def test_trivial():
 
 
 def test_regression():
-    # https://mail.scipy.org/pipermail/scipy-user/2010-October/026944.html
+    # https://mail.python.org/pipermail/scipy-user/2010-October/026944.html
     n = 10
     X = np.ones((n, 1))
     A = np.identity(n)
diff --git a/scipy/spatial/setup.py b/scipy/spatial/setup.py
index ef1b922c66f3..9374926385bc 100755
--- a/scipy/spatial/setup.py
+++ b/scipy/spatial/setup.py
@@ -43,8 +43,8 @@ def get_qhull_misc_config(ext, build_dir):
                          sources=['qhull.c'] + qhull_src + [get_qhull_misc_config],
                          **cfg)
 
-    # cKDTree    
-    ckdtree_src = ['query.cxx', 
+    # cKDTree
+    ckdtree_src = ['query.cxx',
                    'build.cxx',
                    'globals.cxx',
                    'cpp_exc.cxx',
@@ -53,20 +53,20 @@ def get_qhull_misc_config(ext, build_dir):
                    'query_ball_point.cxx',
                    'query_ball_tree.cxx',
                    'sparse_distances.cxx']
-                   
+
     ckdtree_src = [join('ckdtree', 'src', x) for x in ckdtree_src]
-    
-    ckdtree_headers = ['ckdtree_decl.h', 
-                       'cpp_exc.h', 
+
+    ckdtree_headers = ['ckdtree_decl.h',
+                       'cpp_exc.h',
                        'ckdtree_methods.h',
                        'cpp_utils.h',
                        'rectangle.h',
                        'distance.h',
                        'distance_box.h',
                        'ordered_pair.h']
-                       
+
     ckdtree_headers = [join('ckdtree', 'src', x) for x in ckdtree_headers]
-        
+
     ckdtree_dep = ['ckdtree.cxx'] + ckdtree_headers + ckdtree_src
     config.add_extension('ckdtree',
                          sources=['ckdtree.cxx'] + ckdtree_src,
@@ -87,13 +87,7 @@ def get_qhull_misc_config(ext, build_dir):
 
     return config
 
+
 if __name__ == '__main__':
     from numpy.distutils.core import setup
-    setup(maintainer="SciPy Developers",
-          author="Anne Archibald",
-          maintainer_email="scipy-dev@scipy.org",
-          description="Spatial algorithms and data structures",
-          url="https://www.scipy.org",
-          license="SciPy License (BSD Style)",
-          **configuration(top_path='').todict()
-          )
+    setup(**configuration(top_path='').todict())
diff --git a/setup.py b/setup.py
index f6bbd931b32f..1aad207cfeb7 100755
--- a/setup.py
+++ b/setup.py
@@ -367,7 +367,7 @@ def setup_package():
     metadata = dict(
         name='scipy',
         maintainer="SciPy Developers",
-        maintainer_email="scipy-dev@scipy.org",
+        maintainer_email="scipy-dev@python.org",
         description=DOCLINES[0],
         long_description="\n".join(DOCLINES[2:]),
         url="https://www.scipy.org",