From d152d63e7dbeb67d33cce4ed9ba5035114c15cdd Mon Sep 17 00:00:00 2001 From: wangxiaolei Date: Thu, 11 Dec 2025 09:47:39 +0800 Subject: [PATCH] =?UTF-8?q?chore:=20update=20remove=5Fleading=5Fsymbols=20?= =?UTF-8?q?pattern,=20keep=20=E3=80=90=20(#29419)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> --- api/core/tools/utils/text_processing_utils.py | 2 +- api/tests/unit_tests/utils/test_text_processing.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/api/core/tools/utils/text_processing_utils.py b/api/core/tools/utils/text_processing_utils.py index 80c69e94c8..0f9a91a111 100644 --- a/api/core/tools/utils/text_processing_utils.py +++ b/api/core/tools/utils/text_processing_utils.py @@ -13,5 +13,5 @@ def remove_leading_symbols(text: str) -> str: """ # Match Unicode ranges for punctuation and symbols # FIXME this pattern is confused quick fix for #11868 maybe refactor it later - pattern = r"^[\u2000-\u206F\u2E00-\u2E7F\u3000-\u303F\"#$%&'()*+,./:;<=>?@^_`~]+" + pattern = r'^[\[\]\u2000-\u2025\u2027-\u206F\u2E00-\u2E7F\u3000-\u300F\u3011-\u303F"#$%&\'()*+,./:;<=>?@^_`~]+' return re.sub(pattern, "", text) diff --git a/api/tests/unit_tests/utils/test_text_processing.py b/api/tests/unit_tests/utils/test_text_processing.py index 8af47e8967..11e017464a 100644 --- a/api/tests/unit_tests/utils/test_text_processing.py +++ b/api/tests/unit_tests/utils/test_text_processing.py @@ -14,6 +14,7 @@ from core.tools.utils.text_processing_utils import remove_leading_symbols ("Hello, World!", "Hello, World!"), ("", ""), (" ", " "), + ("【测试】", "【测试】"), ], ) def test_remove_leading_symbols(input_text, expected_output):