mirror of https://github.com/langgenius/dify.git
optimize _merge_splits function by using enumerate instead of manual index tracking (#25680)
Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
parent
efcf052004
commit
a3f2c05632
|
|
@ -110,9 +110,7 @@ class TextSplitter(BaseDocumentTransformer, ABC):
|
|||
docs = []
|
||||
current_doc: list[str] = []
|
||||
total = 0
|
||||
index = 0
|
||||
for d in splits:
|
||||
_len = lengths[index]
|
||||
for d, _len in zip(splits, lengths):
|
||||
if total + _len + (separator_len if len(current_doc) > 0 else 0) > self._chunk_size:
|
||||
if total > self._chunk_size:
|
||||
logger.warning(
|
||||
|
|
@ -134,7 +132,6 @@ class TextSplitter(BaseDocumentTransformer, ABC):
|
|||
current_doc = current_doc[1:]
|
||||
current_doc.append(d)
|
||||
total += _len + (separator_len if len(current_doc) > 1 else 0)
|
||||
index += 1
|
||||
doc = self._join_docs(current_doc, separator)
|
||||
if doc is not None:
|
||||
docs.append(doc)
|
||||
|
|
|
|||
Loading…
Reference in New Issue