fix: dataset metadata counts when documents are deleted (#28305)

Signed-off-by: kenwoodjw <blackxin55+@gmail.com>
This commit is contained in:
kenwoodjw 2025-11-18 17:36:07 +08:00 committed by GitHub
parent e83c7438cb
commit a78bc507c0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 8 additions and 2 deletions

View File

@ -9,7 +9,7 @@ from core.rag.index_processor.index_processor_factory import IndexProcessorFacto
from core.tools.utils.web_reader_tool import get_image_upload_file_ids from core.tools.utils.web_reader_tool import get_image_upload_file_ids
from extensions.ext_database import db from extensions.ext_database import db
from extensions.ext_storage import storage from extensions.ext_storage import storage
from models.dataset import Dataset, DocumentSegment from models.dataset import Dataset, DatasetMetadataBinding, DocumentSegment
from models.model import UploadFile from models.model import UploadFile
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -37,6 +37,11 @@ def batch_clean_document_task(document_ids: list[str], dataset_id: str, doc_form
if not dataset: if not dataset:
raise Exception("Document has no dataset") raise Exception("Document has no dataset")
db.session.query(DatasetMetadataBinding).where(
DatasetMetadataBinding.dataset_id == dataset_id,
DatasetMetadataBinding.document_id.in_(document_ids),
).delete(synchronize_session=False)
segments = db.session.scalars( segments = db.session.scalars(
select(DocumentSegment).where(DocumentSegment.document_id.in_(document_ids)) select(DocumentSegment).where(DocumentSegment.document_id.in_(document_ids))
).all() ).all()
@ -71,6 +76,7 @@ def batch_clean_document_task(document_ids: list[str], dataset_id: str, doc_form
except Exception: except Exception:
logger.exception("Delete file failed when document deleted, file_id: %s", file.id) logger.exception("Delete file failed when document deleted, file_id: %s", file.id)
db.session.delete(file) db.session.delete(file)
db.session.commit() db.session.commit()
end_at = time.perf_counter() end_at = time.perf_counter()