Skip to content

Commit

Permalink
Merge pull request scipy#7905 from apbard/REV-restore-wminkowski
Browse files Browse the repository at this point in the history
REV: restore wminkowski
  • Loading branch information
rgommers authored Sep 22, 2017
2 parents 6bb5af0 + 1c2907e commit 8caf958
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 18 deletions.
12 changes: 12 additions & 0 deletions doc/release/1.0.0-notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,18 @@ The ``fillvalue`` of `scipy.signal.convolve2d` will be cast directly to the
dtypes of the input arrays in the future and checked that it is a scalar or
an array with a single element.

``scipy.spatial.distance.matching`` is deprecated. It is an alias of
`scipy.spatial.distance.hamming`, which should be used instead.

Implementation of `scipy.spatial.distance.wminkowski` was based on a wrong
interpretation of the metric definition. In scipy 1.0 it has been just
deprecated in the documentation to keep retro-compatibility but is recommended
to use the new version of `scipy.spatial.distance.minkowski` that implements
the correct behaviour.

Positional arguments of `scipy.spatial.distance.pdist` and
`scipy.spatial.distance.cdist` should be replaced with their keyword version.


Backwards incompatible changes
==============================
Expand Down
47 changes: 33 additions & 14 deletions scipy/spatial/distance.py
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,7 @@ def minkowski(u, v, p=2, w=None):
{||u-v||}_p = (\\sum{|u_i - v_i|^p})^{1/p}.
\\left(\\sum{(|w_i (u_i - v_i)|^p)}\\right)^{1/p}.
\\left(\\sum{w_i(|(u_i - v_i)|^p)}\\right)^{1/p}.
Parameters
----------
Expand Down Expand Up @@ -486,18 +486,41 @@ def minkowski(u, v, p=2, w=None):
# deprecated `wminkowski`. Not done at once because it would be annoying for
# downstream libraries that used `wminkowski` and support multiple scipy
# versions.
def wminkowski(*args, **kwds):
return minkowski(*args, **kwds)
def wminkowski(u, v, p, w):
"""
Computes the weighted Minkowski distance between two 1-D arrays.
The weighted Minkowski distance between `u` and `v`, defined as
.. math::
\\left(\\sum{(|w_i (u_i - v_i)|^p)}\\right)^{1/p}.
Parameters
----------
u : (N,) array_like
Input array.
v : (N,) array_like
Input array.
p : int
The order of the norm of the difference :math:`{||u-v||}_p`.
w : (N,) array_like
The weight vector.
if minkowski.__doc__ is not None:
doc = minkowski.__doc__.replace("Minkowski", "Weighted Minkowski")
doc += """Notes
Returns
-------
wminkowski : double
The weighted Minkowski distance between vectors `u` and `v`.
Notes
-----
`wminkowski` is DEPRECATED. It is simply an alias of `minkowski` in
scipy >= 1.0.
`wminkowski` is DEPRECATED. It implements a definition where weights
are powered. It is recommended to use the weighted version of `minkowski`
instead. This function will be removed in a future version of scipy.
"""
wminkowski.__doc__ = doc
w = _validate_vector(w)
return minkowski(u, v, p=p, w=w**p)


def euclidean(u, v, w=None):
"""
Expand All @@ -509,7 +532,7 @@ def euclidean(u, v, w=None):
{||u-v||}_2
\\left(\\sum{(|w_i (u_i - v_i)|^2)}\\right)^{1/2}
\\left(\\sum{(w_i |(u_i - v_i)|^2)}\\right)^{1/2}
Parameters
----------
Expand Down Expand Up @@ -1675,8 +1698,6 @@ def pdist(X, metric='euclidean', *args, **kwargs):
if(mstr in _METRICS['seuclidean'].aka or
mstr in _METRICS['mahalanobis'].aka):
raise ValueError("metric %s incompatible with weights" % mstr)
if mstr in _METRICS['wminkowski'].aka:
mstr = "minkowski"
# need to use python version for weighting
kwargs['out'] = out
mstr = "test_%s" % mstr
Expand Down Expand Up @@ -2404,8 +2425,6 @@ def cdist(XA, XB, metric='euclidean', *args, **kwargs):
if(mstr in _METRICS['seuclidean'].aka or
mstr in _METRICS['mahalanobis'].aka):
raise ValueError("metric %s incompatible with weights" % mstr)
if mstr in _METRICS['wminkowski'].aka:
mstr = "minkowski"
# need to use python version for weighting
kwargs['out'] = out
mstr = "test_%s" % mstr
Expand Down
31 changes: 27 additions & 4 deletions scipy/spatial/tests/test_distance.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@
hamming, jaccard, kulsinski, mahalanobis,
matching, minkowski, rogerstanimoto,
russellrao, seuclidean, sokalmichener,
sokalsneath, sqeuclidean, yule, wminkowski)
sokalsneath, sqeuclidean, yule)
from scipy.spatial.distance import wminkowski as old_wminkowski

_filenames = [
"cdist-X1.txt",
Expand Down Expand Up @@ -221,7 +222,7 @@ def _assert_within_tol(a, b, atol=0, rtol=0, verbose_=False):

def _rand_split(arrays, weights, axis, split_per, seed=None):
# inverse operation for stats.collapse_weights
weights = np.array(weights) # modified inplace; need a copy
weights = np.array(weights, dtype=np.float64) # modified inplace; need a copy
seeded_rand = np.random.RandomState(seed)

def mytake(a, ix, axis):
Expand All @@ -236,7 +237,7 @@ def mytake(a, ix, axis):
prev_w = weights[split_ix]
q = seeded_rand.rand()
weights[split_ix] = q * prev_w
weights = np.append(weights, (1-q) * prev_w)
weights = np.append(weights, (1. - q) * prev_w)
arrays = [np.append(a, mytake(a, split_ix, axis=axis),
axis=axis) for a in arrays]
return arrays, weights
Expand Down Expand Up @@ -362,7 +363,7 @@ def wrapped(*args, **kwargs):
wdice = _weight_checked(dice)
wcosine = _weight_checked(cosine)
wcorrelation = _weight_checked(correlation)
wminkowski = _weight_checked(wminkowski, const_test=False)
wminkowski = _weight_checked(minkowski, const_test=False)
wjaccard = _weight_checked(jaccard)
weuclidean = _weight_checked(euclidean, const_test=False)
wsqeuclidean = _weight_checked(sqeuclidean, const_test=False)
Expand Down Expand Up @@ -533,6 +534,8 @@ def test_cdist_calling_conventions(self):
for metric in _METRICS_NAMES:
if verbose > 2:
print("testing: ", metric, " with: ", eo_name)
if metric == 'wminkowski':
continue
if metric in {'dice', 'yule', 'kulsinski', 'matching',
'rogerstanimoto', 'russellrao', 'sokalmichener',
'sokalsneath'} and 'bool' not in eo_name:
Expand Down Expand Up @@ -1294,6 +1297,8 @@ def test_pdist_calling_conventions(self):
# NOTE: num samples needs to be > than dimensions for mahalanobis
X = eo[eo_name][::5, ::2]
for metric in _METRICS_NAMES:
if metric == 'wminkowski':
continue
if verbose > 2:
print("testing: ", metric, " with: ", eo_name)
if metric in {'dice', 'yule', 'kulsinski', 'matching',
Expand Down Expand Up @@ -1391,6 +1396,24 @@ def test_minkowski(self):
assert_almost_equal(dist1p5, (1.0 + 2.0**1.5)**(2. / 3))
dist2 = wminkowski(x, y, p=2)

def test_old_wminkowski(self):
with suppress_warnings() as wrn:
wrn.filter(message="`wminkowski` is deprecated")
w = np.array([1.0, 2.0, 0.5])
for x, y in self.cases:
dist1 = old_wminkowski(x, y, p=1, w=w)
assert_almost_equal(dist1, 3.0)
dist1p5 = old_wminkowski(x, y, p=1.5, w=w)
assert_almost_equal(dist1p5, (2.0**1.5+1.0)**(2./3))
dist2 = old_wminkowski(x, y, p=2, w=w)
assert_almost_equal(dist2, np.sqrt(5))

# test weights Issue #7893
arr = np.arange(4)
w = np.full_like(arr, 4)
assert_almost_equal(old_wminkowski(arr, arr + 1, p=2, w=w), 8.0)
assert_almost_equal(wminkowski(arr, arr + 1, p=2, w=w), 4.0)

def test_euclidean(self):
for x, y in self.cases:
dist = weuclidean(x, y)
Expand Down

0 comments on commit 8caf958

Please sign in to comment.