From 78ca5ad1426af1a74144f78d2b33681c51cf013b Mon Sep 17 00:00:00 2001 From: wangxiaolei Date: Thu, 18 Dec 2025 16:50:44 +0800 Subject: [PATCH] fix: fix fixed_separator (#29861) --- api/core/rag/splitter/fixed_text_splitter.py | 3 ++- .../unit_tests/core/rag/splitter/test_text_splitter.py | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/api/core/rag/splitter/fixed_text_splitter.py b/api/core/rag/splitter/fixed_text_splitter.py index 801d2a2a52..e95c009292 100644 --- a/api/core/rag/splitter/fixed_text_splitter.py +++ b/api/core/rag/splitter/fixed_text_splitter.py @@ -2,6 +2,7 @@ from __future__ import annotations +import codecs import re from typing import Any @@ -52,7 +53,7 @@ class FixedRecursiveCharacterTextSplitter(EnhanceRecursiveCharacterTextSplitter) def __init__(self, fixed_separator: str = "\n\n", separators: list[str] | None = None, **kwargs: Any): """Create a new TextSplitter.""" super().__init__(**kwargs) - self._fixed_separator = fixed_separator + self._fixed_separator = codecs.decode(fixed_separator, "unicode_escape") self._separators = separators or ["\n\n", "\n", "。", ". ", " ", ""] def split_text(self, text: str) -> list[str]: diff --git a/api/tests/unit_tests/core/rag/splitter/test_text_splitter.py b/api/tests/unit_tests/core/rag/splitter/test_text_splitter.py index 7d246ac3cc..943a9e5712 100644 --- a/api/tests/unit_tests/core/rag/splitter/test_text_splitter.py +++ b/api/tests/unit_tests/core/rag/splitter/test_text_splitter.py @@ -901,6 +901,13 @@ class TestFixedRecursiveCharacterTextSplitter: # Verify no empty chunks assert all(len(chunk) > 0 for chunk in result) + def test_double_slash_n(self): + data = "chunk 1\n\nsubchunk 1.\nsubchunk 2.\n\n---\n\nchunk 2\n\nsubchunk 1\nsubchunk 2." + separator = "\\n\\n---\\n\\n" + splitter = FixedRecursiveCharacterTextSplitter(fixed_separator=separator) + chunks = splitter.split_text(data) + assert chunks == ["chunk 1\n\nsubchunk 1.\nsubchunk 2.", "chunk 2\n\nsubchunk 1\nsubchunk 2."] + # ============================================================================ # Test Metadata Preservation