Skip to content

Commit

Permalink
fix: 修复批量删除文档没有删除问题的缺陷
Browse files Browse the repository at this point in the history
--bug=1048687 --user=王孝刚 【知识库】删除文档不会删除文档中分段关联的问题 https://www.tapd.cn/57709429/s/1624544
  • Loading branch information
wxg0103 committed Dec 4, 2024
1 parent d9a46b6 commit c54bfc7
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 17 deletions.
20 changes: 13 additions & 7 deletions apps/dataset/serializers/document_serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from celery_once import AlreadyQueued
from django.core import validators
from django.db import transaction
from django.db.models import QuerySet
from django.db.models import QuerySet, Count
from django.db.models.functions import Substr, Reverse
from django.http import HttpResponse
from drf_yasg import openapi
Expand Down Expand Up @@ -1091,11 +1091,17 @@ def file_to_paragraph(file, pattern_list: List, with_filter: bool, limit: int):


def delete_problems_and_mappings(document_ids):
problem_ids = ProblemParagraphMapping.objects.filter(document_id__in=document_ids).values_list('problem_id',
flat=True)
# 获取所有需要删除的问题ID
problem_ids = list(
ProblemParagraphMapping.objects.filter(document_id__in=document_ids).values_list('problem_id', flat=True))

if problem_ids:
problem_counts = ProblemParagraphMapping.objects.filter(problem_id__in=problem_ids).values(
'problem_id').annotate(count=models.Count('id'))
problem_ids_to_delete = [item['problem_id'] for item in problem_counts if item['count'] == 1]
ProblemParagraphMapping.objects.filter(document_id__in=document_ids).delete()
remaining_problem_counts = ProblemParagraphMapping.objects.filter(problem_id__in=problem_ids).values(
'problem_id').annotate(count=Count('problem_id'))

problem_ids_to_delete = [pid for pid in problem_ids if
not any(pc['problem_id'] == pid for pc in remaining_problem_counts)]
Problem.objects.filter(id__in=problem_ids_to_delete).delete()
ProblemParagraphMapping.objects.filter(document_id__in=document_ids).delete()
else:
ProblemParagraphMapping.objects.filter(document_id__in=document_ids).delete()
29 changes: 19 additions & 10 deletions apps/dataset/serializers/paragraph_serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from celery_once import AlreadyQueued
from django.db import transaction
from django.db.models import QuerySet
from django.db.models import QuerySet, Count
from drf_yasg import openapi
from rest_framework import serializers

Expand Down Expand Up @@ -291,7 +291,7 @@ def batch_delete(self, instance: Dict, with_valid=True):
self.is_valid(raise_exception=True)
paragraph_id_list = instance.get("id_list")
QuerySet(Paragraph).filter(id__in=paragraph_id_list).delete()
QuerySet(ProblemParagraphMapping).filter(paragraph_id__in=paragraph_id_list).delete()
delete_problems_and_mappings(paragraph_id_list)
update_document_char_length(self.data.get('document_id'))
# 删除向量库
delete_embedding_by_paragraph_ids(paragraph_id_list)
Expand Down Expand Up @@ -541,14 +541,7 @@ def delete(self, with_valid=False):
self.is_valid(raise_exception=True)
paragraph_id = self.data.get('paragraph_id')
Paragraph.objects.filter(id=paragraph_id).delete()

problem_id = ProblemParagraphMapping.objects.filter(paragraph_id=paragraph_id).values_list('problem_id',
flat=True).first()

if problem_id is not None:
if ProblemParagraphMapping.objects.filter(problem_id=problem_id).count() == 1:
Problem.objects.filter(id=problem_id).delete()
ProblemParagraphMapping.objects.filter(paragraph_id=paragraph_id).delete()
delete_problems_and_mappings([paragraph_id])

update_document_char_length(self.data.get('document_id'))
delete_embedding_by_paragraph(paragraph_id)
Expand Down Expand Up @@ -755,3 +748,19 @@ def batch_generate_related(self, instance: Dict, with_valid=True):
prompt)
except AlreadyQueued as e:
raise AppApiException(500, "任务正在执行中,请勿重复下发")


def delete_problems_and_mappings(paragraph_ids):
problem_ids = list(
ProblemParagraphMapping.objects.filter(paragraph_id__in=paragraph_ids).values_list('problem_id', flat=True))

if problem_ids:
ProblemParagraphMapping.objects.filter(paragraph_id__in=paragraph_ids).delete()
remaining_problem_counts = ProblemParagraphMapping.objects.filter(problem_id__in=problem_ids).values(
'problem_id').annotate(count=Count('problem_id'))

problem_ids_to_delete = [pid for pid in problem_ids if
not any(pc['problem_id'] == pid for pc in remaining_problem_counts)]
Problem.objects.filter(id__in=problem_ids_to_delete).delete()
else:
ProblemParagraphMapping.objects.filter(paragraph_id__in=paragraph_ids).delete()

0 comments on commit c54bfc7

Please sign in to comment.