diff --git a/api/core/rag/splitter/fixed_text_splitter.py b/api/core/rag/splitter/fixed_text_splitter.py index 53032b34d5..1f846e9518 100644 --- a/api/core/rag/splitter/fixed_text_splitter.py +++ b/api/core/rag/splitter/fixed_text_splitter.py @@ -65,8 +65,9 @@ class FixedRecursiveCharacterTextSplitter(EnhanceRecursiveCharacterTextSplitter) chunks = [text] final_chunks = [] - for chunk in chunks: - if self._length_function(chunk) > self._chunk_size: + chunks_lengths = self._length_function(chunks) + for chunk, chunk_length in zip(chunks, chunks_lengths): + if chunk_length > self._chunk_size: final_chunks.extend(self.recursive_split_text(chunk)) else: final_chunks.append(chunk) @@ -93,8 +94,8 @@ class FixedRecursiveCharacterTextSplitter(EnhanceRecursiveCharacterTextSplitter) # Now go merging things, recursively splitting longer texts. _good_splits = [] _good_splits_lengths = [] # cache the lengths of the splits - for s in splits: - s_len = self._length_function(s) + s_lens = self._length_function(splits) + for s, s_len in zip(splits, s_lens): if s_len < self._chunk_size: _good_splits.append(s) _good_splits_lengths.append(s_len)