From 3b8559521d5f9f1c28f79f28d018aa8be7793c01 Mon Sep 17 00:00:00 2001 From: GitHub Contributor Date: Fri, 26 Jun 2026 04:09:16 +0800 Subject: [PATCH] fix: use isolated session in _on_query to prevent premature commit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The _on_query method was calling db.session.commit() on the Flask-scoped SQLAlchemy session, which committed all pending dirty state from the current request — not just the DatasetQuery audit rows. This broke transaction isolation: if the downstream workflow failed, the subsequent db.session.rollback() could not revert the already-committed modifications (e.g. token deductions, partial node executions), leaving dirty data in the database. The same file already demonstrates the correct pattern in _on_retrieval_end, which uses sessionmaker(bind=db.engine).begin() with an independent session. This change applies the same approach to _on_query. Additionally fixed a latent bug where dataset_queries.add_all() was called inside the loop on every iteration, re-adding previously accumulated rows. Fixes #37886 --- api/core/rag/retrieval/dataset_retrieval.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/api/core/rag/retrieval/dataset_retrieval.py b/api/core/rag/retrieval/dataset_retrieval.py index 474c9f90c78..fcaa815100c 100644 --- a/api/core/rag/retrieval/dataset_retrieval.py +++ b/api/core/rag/retrieval/dataset_retrieval.py @@ -1030,6 +1030,9 @@ class DatasetRetrieval: ): """ Persist dataset query audit rows for retrieval requests. + + Uses an independent session to avoid committing the request-scoped + db.session, which would break transaction isolation for the caller. """ if not query and not attachment_ids: return @@ -1059,9 +1062,9 @@ class DatasetRetrieval: created_by=created_by, ) dataset_queries.append(dataset_query) - if dataset_queries: - db.session.add_all(dataset_queries) - db.session.commit() + if dataset_queries: + with sessionmaker(bind=db.engine).begin() as session: + session.add_all(dataset_queries) def _retriever( self,