Skip to content

Commit

Permalink
fix: Optimize the logic of the double loop in process_group to improv…
Browse files Browse the repository at this point in the history
…e QA speed.
  • Loading branch information
xixihahaliu committed Mar 5, 2024
1 parent 9498082 commit 41b0aa4
Showing 1 changed file with 7 additions and 5 deletions.
12 changes: 7 additions & 5 deletions qanything_kernel/connector/database/milvus/milvus_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,14 +210,16 @@ def process_group(self, group):
file_name = group[0].metadata['file_name']
group_scores_map = {}
# 先找出该文件所有需要搜索的chunk_id
cand_chunks = []
cand_chunks_set = set() # 使用集合而不是列表
for cand_doc in group:
current_chunk_id = int(cand_doc.metadata['chunk_id'].split('_')[-1])
group_scores_map[current_chunk_id] = cand_doc.metadata['score']
for i in range(current_chunk_id - 200, current_chunk_id + 200):
need_search_id = file_id + '_' + str(i)
if need_search_id not in cand_chunks:
cand_chunks.append(need_search_id)
# 使用 set comprehension 一次性生成区间内所有可能的 chunk_id
chunk_ids = {file_id + '_' + str(i) for i in range(current_chunk_id - 200, current_chunk_id + 200)}
# 更新 cand_chunks_set 集合
cand_chunks_set.update(chunk_ids)

cand_chunks = list(cand_chunks_set)

group_relative_chunks = self.query_expr_async(expr=f"file_id == \"{file_id}\" and chunk_id in {cand_chunks}",
output_fields=["chunk_id", "content"])
Expand Down

0 comments on commit 41b0aa4

Please sign in to comment.