From 1233e46754e9e22c9246fa4e60df6b4b51359c0e Mon Sep 17 00:00:00 2001 From: Yeuoly Date: Mon, 10 Mar 2025 13:44:33 +0800 Subject: [PATCH] fix: prevent infinite loop in text splitter when processing empty documents --- api/core/rag/splitter/text_splitter.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/api/core/rag/splitter/text_splitter.py b/api/core/rag/splitter/text_splitter.py index 34b4056cf5..a50d0d02d0 100644 --- a/api/core/rag/splitter/text_splitter.py +++ b/api/core/rag/splitter/text_splitter.py @@ -129,6 +129,13 @@ class TextSplitter(BaseDocumentTransformer, ABC): while total > self._chunk_overlap or ( total + _len + (separator_len if len(current_doc) > 0 else 0) > self._chunk_size and total > 0 ): + if current_doc and len(current_doc[0]) == 0: + current_doc = current_doc[1:] + continue + + if len(current_doc) == 0: + break + total -= self._length_function([current_doc[0]])[0] + ( separator_len if len(current_doc) > 1 else 0 )