-
Notifications
You must be signed in to change notification settings - Fork 5.3k
/
Copy pathtext_embeddings.py
40 lines (32 loc) · 1.01 KB
/
text_embeddings.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
from cosine_similarity import compute_cosine_similarity
from sentence_transformers import SentenceTransformer
model = SentenceTransformer("all-MiniLM-L6-v2")
texts = [
"The canine barked loudly.",
"The dog made a noisy bark.",
"He ate a lot of pizza.",
"He devoured a large quantity of pizza pie.",
]
text_embeddings = model.encode(texts)
print(type(text_embeddings))
print(text_embeddings.shape)
text_embeddings_dict = dict(zip(texts, list(text_embeddings)))
dog_text_1 = "The canine barked loudly."
dog_text_2 = "The dog made a noisy bark."
print(
compute_cosine_similarity(
text_embeddings_dict[dog_text_1], text_embeddings_dict[dog_text_2]
)
)
pizza_text_1 = "He ate a lot of pizza."
pizza_text_2 = "He devoured a large quantity of pizza pie."
print(
compute_cosine_similarity(
text_embeddings_dict[pizza_text_1], text_embeddings_dict[pizza_text_2]
)
)
print(
compute_cosine_similarity(
text_embeddings_dict[dog_text_1], text_embeddings_dict[pizza_text_1]
)
)