1
1
# -*- coding: utf-8 -*-
2
2
"""
3
3
The :mod:`sklearn.metrics.pairwise` submodule implements utilities to evaluate
4
- pairwise distances or affinity of sets of samples.
4
+ pairwise distances, paired distances or affinity of sets of samples.
5
5
6
6
This module contains both distance metrics and kernels. A brief summary is
7
7
given on the two here.
@@ -101,6 +101,44 @@ def check_pairwise_arrays(X, Y):
101
101
return X , Y
102
102
103
103
104
+ def check_paired_arrays (X , Y ):
105
+ """ Set X and Y appropriately and checks inputs for paired distances
106
+
107
+ All paired distance metrics should use this function first to assert that
108
+ the given parameters are correct and safe to use.
109
+
110
+ Specifically, this function first ensures that both X and Y are arrays,
111
+ then checks that they are at least two dimensional while ensuring that
112
+ their elements are floats. Finally, the function checks that the size
113
+ of the dimensions of the two arrays are equal.
114
+
115
+ Parameters
116
+ ----------
117
+ X : {array-like, sparse matrix}, shape = [n_samples_a, n_features]
118
+
119
+ Y : {array-like, sparse matrix}, shape = [n_samples_b, n_features]
120
+
121
+ Returns
122
+ -------
123
+ safe_X : {array-like, sparse matrix}, shape = [n_samples_a, n_features]
124
+ An array equal to X, guaranteed to be a numpy array.
125
+
126
+ safe_Y : {array-like, sparse matrix}, shape = [n_samples_b, n_features]
127
+ An array equal to Y if Y was not None, guaranteed to be a numpy array.
128
+ If Y was None, safe_Y will be a pointer to X.
129
+
130
+ """
131
+ X , Y = check_pairwise_arrays (X , Y )
132
+ if X .shape != Y .shape :
133
+ raise ValueError ("X and Y should be of same shape. They were "
134
+ "respectively (%d, %d) and (%d, %d) long." % (
135
+ X .shape [0 ],
136
+ X .shape [1 ],
137
+ Y .shape [0 ],
138
+ Y .shape [1 ]))
139
+ return X , Y
140
+
141
+
104
142
# Pairwise distances
105
143
def euclidean_distances (X , Y = None , Y_norm_squared = None , squared = False ):
106
144
"""
@@ -146,6 +184,10 @@ def euclidean_distances(X, Y=None, Y_norm_squared=None, squared=False):
146
184
>>> euclidean_distances(X, [[0, 0]])
147
185
array([[ 1. ],
148
186
[ 1.41421356]])
187
+
188
+ See also
189
+ --------
190
+ paired_distances : distances betweens pairs of elements of X and Y.
149
191
"""
150
192
# should not need X_norm_squared because if you could precompute that as
151
193
# well as Y, then you should just pre-compute the output and not even
@@ -305,7 +347,7 @@ def cosine_distances(X, Y=None):
305
347
# Paired distances
306
348
def paired_euclidean_distances (X , Y ):
307
349
"""
308
- Computes the paired distances between X and Y
350
+ Computes the paired euclidean distances between X and Y
309
351
310
352
Parameters
311
353
----------
@@ -315,19 +357,27 @@ def paired_euclidean_distances(X, Y):
315
357
316
358
Returns
317
359
-------
318
- Distances : ndarray (n_samples, )
360
+ distances : ndarray (n_samples, )
319
361
"""
320
- if len (X ) != len (Y ):
321
- raise ValueError ("X and Y should be of same size. They were "
322
- "respectively %d and %d long." % (len (X ), len (Y )))
323
- X , Y = check_pairwise_arrays (X , Y )
362
+ X , Y = check_paired_arrays (X , Y )
363
+
324
364
return np .sqrt (((X - Y ) ** 2 ).sum (axis = - 1 ))
325
365
326
366
327
367
def paired_manhattan_distances (X , Y ):
328
- """ Compute the L1 distances between the vectors in X and Y.
368
+ """Compute the L1 distances between the vectors in X and Y.
369
+
370
+ Parameters
371
+ ----------
372
+ X : array-like, shape = [n_samples, n_features]
373
+
374
+ Y : array-like, shape = [n_samples, n_features]
375
+
376
+ Returns
377
+ -------
378
+ distances : ndarray (n_samples, )
329
379
"""
330
- X , Y = check_pairwise_arrays (X , Y )
380
+ X , Y = check_paired_arrays (X , Y )
331
381
return np .abs (X - Y ).sum (axis = - 1 )
332
382
333
383
@@ -340,24 +390,49 @@ def paired_manhattan_distances(X, Y):
340
390
}
341
391
342
392
343
- def paired_distances (X , Y , metric = "euclidean" ):
393
+ def paired_distances (X , Y , metric = "euclidean" , ** kwds ):
344
394
"""
395
+ Computes the paired distances between X and Y.
396
+
397
+ Computes the distances between (X[0], Y[0]), (X[1], Y[1]), etc...
345
398
346
399
Parameters
347
400
----------
348
401
X, Y : ndarray (n_samples, n_features]
349
402
350
403
metric : string or callable
404
+ XXX
405
+ The metric to use when calculating distance between instances in a
406
+ feature array. If metric is a string, it must be one of the options
407
+ specified in PAIRED_DISTANCES
408
+ Alternatively, if metric is a callable function, it is called on each
409
+ pair of instances (rows) and the resulting value recorded. The callable
410
+ should take two arrays from X as input and return a value indicating
411
+ the distance between them.
351
412
352
413
Returns
353
414
-------
354
415
distances : ndarray (n_samples, )
416
+
417
+ Examples
418
+ --------
419
+ >>> from sklearn.metrics.pairwise import paired_distances
420
+ >>> X = [[0, 1], [1, 1]]
421
+ >>> Y = [[0, 1], [2, 1]]
422
+ >>> paired_distances(X, Y)
423
+ array([ 0., 1.])
424
+
425
+ See also
426
+ --------
427
+ pairwise_distances : pairwise distances.
355
428
"""
356
429
357
430
if metric in PAIRED_DISTANCES :
358
431
func = PAIRED_DISTANCES [metric ]
359
432
return func (X , Y )
360
433
elif callable (metric ):
434
+ # Check the matrix first (it is usually done by the metric)
435
+ X , Y = check_paired_arrays (X , Y )
361
436
distances = np .zeros (len (X ))
362
437
for i in range (len (X )):
363
438
distances [i ] = metric (X [i ], Y [i ])
0 commit comments