forked from ddbourgin/numpy-ml
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdistance_metrics.py
132 lines (97 loc) · 3.08 KB
/
distance_metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import numpy as np
def euclidean(x, y):
"""
Compute the Euclidean (`L2`) distance between two real vectors
Notes
-----
The Euclidean distance between two vectors **x** and **y** is
.. math::
d(\mathbf{x}, \mathbf{y}) = \sqrt{ \sum_i (x_i - y_i)^2 }
Parameters
----------
x,y : :py:class:`ndarray <numpy.ndarray>` s of shape `(N,)`
The two vectors to compute the distance between
Returns
-------
d : float
The L2 distance between **x** and **y**.
"""
return np.sqrt(np.sum((x - y) ** 2))
def manhattan(x, y):
"""
Compute the Manhattan (`L1`) distance between two real vectors
Notes
-----
The Manhattan distance between two vectors **x** and **y** is
.. math::
d(\mathbf{x}, \mathbf{y}) = \sum_i |x_i - y_i|
Parameters
----------
x,y : :py:class:`ndarray <numpy.ndarray>` s of shape `(N,)`
The two vectors to compute the distance between
Returns
-------
d : float
The L1 distance between **x** and **y**.
"""
return np.sum(np.abs(x - y))
def chebyshev(x, y):
"""
Compute the Chebyshev (:math:`L_\infty`) distance between two real vectors
Notes
-----
The Chebyshev distance between two vectors **x** and **y** is
.. math::
d(\mathbf{x}, \mathbf{y}) = \max_i |x_i - y_i|
Parameters
----------
x,y : :py:class:`ndarray <numpy.ndarray>` s of shape `(N,)`
The two vectors to compute the distance between
Returns
-------
d : float
The Chebyshev distance between **x** and **y**.
"""
return np.max(np.abs(x - y))
def minkowski(x, y, p):
"""
Compute the Minkowski-`p` distance between two real vectors.
Notes
-----
The Minkowski-`p` distance between two vectors **x** and **y** is
.. math::
d(\mathbf{x}, \mathbf{y}) = \left( \sum_i |x_i - y_i|^p \\right)^{1/p}
Parameters
----------
x,y : :py:class:`ndarray <numpy.ndarray>` s of shape `(N,)`
The two vectors to compute the distance between
p : float > 1
The parameter of the distance function. When `p = 1`, this is the `L1`
distance, and when `p=2`, this is the `L2` distance. For `p < 1`,
Minkowski-`p` does not satisfy the triangle inequality and hence is not
a valid distance metric.
Returns
-------
d : float
The Minkowski-`p` distance between **x** and **y**.
"""
return np.sum(np.abs(x - y) ** p) ** (1 / p)
def hamming(x, y):
"""
Compute the Hamming distance between two integer-valued vectors.
Notes
-----
The Hamming distance between two vectors **x** and **y** is
.. math::
d(\mathbf{x}, \mathbf{y}) = \\frac{1}{N} \sum_i \mathbb{1}_{x_i \\neq y_i}
Parameters
----------
x,y : :py:class:`ndarray <numpy.ndarray>` s of shape `(N,)`
The two vectors to compute the distance between. Both vectors should be
integer-valued.
Returns
-------
d : float
The Hamming distance between **x** and **y**.
"""
return np.sum(x != y) / len(x)