Skip to content

Commit

Permalink
Add cosine similarity calculation. (keon#527)
Browse files Browse the repository at this point in the history
* Add cosine distance calculation

* Add cosine similarity to unit test and README

* Fix some typo

* Add docstring for cosine_similarity.py
  • Loading branch information
sanga-yoon authored and goswami-rahul committed Sep 30, 2019
1 parent 9cebd7a commit 217f092
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 0 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ If you want to uninstall algorithms, it is as simple as:
- [maths](algorithms/maths)
- [base_conversion](algorithms/maths/base_conversion.py)
- [combination](algorithms/maths/combination.py)
- [cosine_similarity](algorithms/maths/cosine_similarity.py)
- [decimal_to_binary_ip](algorithms/maths/decimal_to_binary_ip.py)
- [euler_totient](algorithms/maths/euler_totient.py)
- [extended_gcd](algorithms/maths/extended_gcd.py)
Expand Down
1 change: 1 addition & 0 deletions algorithms/maths/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@
from .rabin_miller import *
from .rsa import *
from .combination import *
from .cosine_similarity import *
42 changes: 42 additions & 0 deletions algorithms/maths/cosine_similarity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
"""
Calculate cosine similarity between given two 1d list.
Two list must have the same length.
Example:
cosine_similarity([1, 1, 1], [1, 2, -1]) # output : 0.47140452079103173
"""
import math


def _l2_distance(vec):
"""
Calculate l2 distance from two given vectors.
"""
norm = 0.
for e in vec:
norm += e * e
norm = math.sqrt(norm)
return norm


def cosine_similarity(a, b):
"""
Calculate cosine similarity between given two vectors
:type a: list
:type b: list
"""
if len(a) != len(b):
raise ValueError("The two vectors must be the same length. Got shape " + str(len(a)) + " and " + str(len(b)))

norm_a = _l2_distance(a)
norm_b = _l2_distance(b)

similarity = 0.

# Calculate the dot product of two vectors
for ae, be in zip(a, b):
similarity += ae * be

similarity /= (norm_a * norm_b)

return similarity
17 changes: 17 additions & 0 deletions tests/test_maths.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
encrypt, decrypt,
combination, combination_memo,
hailstone,
cosine_similarity,
)

import unittest
Expand Down Expand Up @@ -301,5 +302,21 @@ def test_hailstone(self):
self.assertEqual([10, 5, 16, 8, 4, 2, 1], hailstone.hailstone(10))


class TestCosineSimilarity(unittest.TestCase):
"""[summary]
Test for the file cosine_similarity.py
Arguments:
unittest {[type]} -- [description]
"""
def test_cosine_similarity(self):
vec_a = [1, 1, 1]
vec_b = [-1, -1, -1]
vec_c = [1, 2, -1]
self.assertAlmostEqual(cosine_similarity(vec_a, vec_a), 1)
self.assertAlmostEqual(cosine_similarity(vec_a, vec_b), -1)
self.assertAlmostEqual(cosine_similarity(vec_a, vec_c), 0.4714045208)


if __name__ == "__main__":
unittest.main()

0 comments on commit 217f092

Please sign in to comment.