From a3f2c0563221a3fc2a18a75c3bc0ccb845897a57 Mon Sep 17 00:00:00 2001 From: ChasePassion <3047754883@qq.com> Date: Mon, 15 Sep 2025 09:41:16 +0800 Subject: [PATCH] optimize _merge_splits function by using enumerate instead of manual index tracking (#25680) Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- api/core/rag/splitter/text_splitter.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/api/core/rag/splitter/text_splitter.py b/api/core/rag/splitter/text_splitter.py index c5b6ac4608..bfa14e03d6 100644 --- a/api/core/rag/splitter/text_splitter.py +++ b/api/core/rag/splitter/text_splitter.py @@ -110,9 +110,7 @@ class TextSplitter(BaseDocumentTransformer, ABC): docs = [] current_doc: list[str] = [] total = 0 - index = 0 - for d in splits: - _len = lengths[index] + for d, _len in zip(splits, lengths): if total + _len + (separator_len if len(current_doc) > 0 else 0) > self._chunk_size: if total > self._chunk_size: logger.warning( @@ -134,7 +132,6 @@ class TextSplitter(BaseDocumentTransformer, ABC): current_doc = current_doc[1:] current_doc.append(d) total += _len + (separator_len if len(current_doc) > 1 else 0) - index += 1 doc = self._join_docs(current_doc, separator) if doc is not None: docs.append(doc)