forked from keon/algorithms
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add cosine similarity calculation. (keon#527)
* Add cosine distance calculation * Add cosine similarity to unit test and README * Fix some typo * Add docstring for cosine_similarity.py
- Loading branch information
1 parent
9cebd7a
commit 217f092
Showing
4 changed files
with
61 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,3 +14,4 @@ | |
from .rabin_miller import * | ||
from .rsa import * | ||
from .combination import * | ||
from .cosine_similarity import * |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
""" | ||
Calculate cosine similarity between given two 1d list. | ||
Two list must have the same length. | ||
Example: | ||
cosine_similarity([1, 1, 1], [1, 2, -1]) # output : 0.47140452079103173 | ||
""" | ||
import math | ||
|
||
|
||
def _l2_distance(vec): | ||
""" | ||
Calculate l2 distance from two given vectors. | ||
""" | ||
norm = 0. | ||
for e in vec: | ||
norm += e * e | ||
norm = math.sqrt(norm) | ||
return norm | ||
|
||
|
||
def cosine_similarity(a, b): | ||
""" | ||
Calculate cosine similarity between given two vectors | ||
:type a: list | ||
:type b: list | ||
""" | ||
if len(a) != len(b): | ||
raise ValueError("The two vectors must be the same length. Got shape " + str(len(a)) + " and " + str(len(b))) | ||
|
||
norm_a = _l2_distance(a) | ||
norm_b = _l2_distance(b) | ||
|
||
similarity = 0. | ||
|
||
# Calculate the dot product of two vectors | ||
for ae, be in zip(a, b): | ||
similarity += ae * be | ||
|
||
similarity /= (norm_a * norm_b) | ||
|
||
return similarity |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters