forked from shibing624/similarities
-
Notifications
You must be signed in to change notification settings - Fork 0
/
fast_sim_demo.py
85 lines (75 loc) · 2.73 KB
/
fast_sim_demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# -*- coding: utf-8 -*-
"""
@author:XuMing([email protected])
@description: Fast similarity search demo
"""
import os
import sys
sys.path.append('..')
from similarities.fastsim import AnnoySimilarity
from similarities.fastsim import HnswlibSimilarity
sentences = ['如何更换花呗绑定银行卡',
'花呗更改绑定银行卡']
corpus = [
'花呗更改绑定银行卡',
'我什么时候开通了花呗',
'俄罗斯警告乌克兰反对欧盟协议',
'暴风雨掩埋了东北部;新泽西16英寸的降雪',
'中央情报局局长访问以色列叙利亚会谈',
'人在巴基斯坦基地的炸弹袭击中丧生',
'我喜欢这首歌'
]
def annoy_demo():
corpus_new = [i + str(id) for id, i in enumerate(corpus * 10)]
model = AnnoySimilarity(corpus=corpus_new)
print(model)
similarity_score = model.similarity(sentences[0], sentences[1])
print(f"{sentences[0]} vs {sentences[1]}, score: {float(similarity_score):.4f}")
model.add_corpus(corpus)
model.build_index()
model.save_index('annoy_model.bin')
print(model.most_similar("men喜欢这首歌"))
# Semantic Search batch
del model
model = AnnoySimilarity()
model.load_index('annoy_model.bin')
print(model.most_similar("men喜欢这首歌"))
queries = ["如何更换花呗绑定银行卡", "men喜欢这首歌"]
res = model.most_similar(queries, topn=3)
print(res)
for q_id, c in res.items():
print('query:', queries[q_id])
print("search top 3:")
for corpus_id, s in c.items():
print(f'\t{model.corpus[corpus_id]}: {s:.4f}')
# os.remove('annoy_model.bin')
print('-' * 50 + '\n')
def hnswlib_demo():
corpus_new = [i + str(id) for id, i in enumerate(corpus * 10)]
print(corpus_new)
model = HnswlibSimilarity(corpus=corpus_new)
print(model)
similarity_score = model.similarity(sentences[0], sentences[1])
print(f"{sentences[0]} vs {sentences[1]}, score: {float(similarity_score):.4f}")
model.add_corpus(corpus)
model.build_index()
model.save_index('hnsw_model.bin')
print(model.most_similar("men喜欢这首歌"))
# Semantic Search batch
del model
model = HnswlibSimilarity()
model.load_index('hnsw_model.bin')
print(model.most_similar("men喜欢这首歌"))
queries = ["如何更换花呗绑定银行卡", "men喜欢这首歌"]
res = model.most_similar(queries, topn=3)
print(res)
for q_id, c in res.items():
print('query:', queries[q_id])
print("search top 3:")
for corpus_id, s in c.items():
print(f'\t{model.corpus[corpus_id]}: {s:.4f}')
# os.remove('hnsw_model.bin')
print('-' * 50 + '\n')
if __name__ == '__main__':
annoy_demo()
hnswlib_demo()